Esempio n. 1
0
    def test_convert_padding_direction(self):
        pad = 1
        left_pad = torch.LongTensor([
            [2, 3, 4, 5, 6],
            [1, 7, 8, 9, 10],
            [1, 1, 1, 11, 12],
        ])
        right_pad = torch.LongTensor([
            [2, 3, 4, 5, 6],
            [7, 8, 9, 10, 1],
            [11, 12, 1, 1, 1],
        ])

        self.assertAlmostEqual(
            right_pad,
            utils.convert_padding_direction(
                left_pad,
                pad,
                left_to_right=True,
            ),
        )
        self.assertAlmostEqual(
            left_pad,
            utils.convert_padding_direction(
                right_pad,
                pad,
                right_to_left=True,
            ),
        )
    def forward(self, src_tokens, src_lengths):
        # Task, and in particular the ``'net_input'`` key in each
        # mini-batch. We discuss Tasks in the next tutorial, but for now just
        # know that *src_tokens* has shape `(batch, src_len)` and *src_lengths*
        # has shape `(batch)`.

        # Note that the source is typically padded on the left. This can be
        # configured by adding the `--left-pad-source "False"` command-line
        # argument, but here we'll make the Encoder handle either kind of
        # padding by converting everything to be right-padded.

        if self.args.left_pad_source:
            # Convert left-padding to right-padding.
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                padding_idx=self.dictionary.pad(),
                left_to_right=True)

        # Return the Encoder's output. This can be any object and will be
        # passed directly to the Decoder.
        debug_out = self.dictionary.string(src_tokens,
                                           bpe_symbol=None,
                                           escape_unk=False)

        batch_penalties = []
        for line in debug_out.split("\n"):
            penalties = make_word_penalties_tokens(
                line=line,
                vocab=self.vocab_set,
                mapx=self.mapx,
                dictionary=self.dictionary,
            )
            batch_penalties.append(penalties)

        return batch_penalties
Esempio n. 3
0
    def forward(self, src_tokens, src_lengths):
        # The inputs to the ``forward()`` function are determined by the
        # Task, and in particular the ``'net_input'`` key in each
        # mini-batch. We discuss Tasks in the next tutorial, but for now just
        # know that *src_tokens* has shape `(batch, src_len)` and *src_lengths*
        # has shape `(batch)`.

        # Note that the source is typically padded on the left. This can be
        # configured by adding the `--left-pad-source "False"` command-line
        # argument, but here we'll make the Encoder handle either kind of
        # padding by converting everything to be right-padded.  # 全弄成右padding
        if self.args.left_pad_source:
            # Convert left-padding to right-padding.
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                padding_idx=self.dictionary.pad(),
                left_to_right=True
            )

        # Embed the source.
        x = self.embed_tokens(src_tokens)

        # Apply dropout.
        x = self.dropout(x)

        # Pack the sequence into a PackedSequence object to feed to the LSTM.
        x = nn.utils.rnn.pack_padded_sequence(x, src_lengths, batch_first=True)

        # Get the output from the LSTM.
        _outputs, (final_hidden, _final_cell) = self.lstm(x)
Esempio n. 4
0
    def forward(self, src_tokens, src_lengths):
        if self.left_pad:
            # nn.utils.rnn.pack_padded_sequence requires right-padding;
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                self.padding_idx,
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.data.tolist())

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        c0 = x.new_zeros(*state_size)
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:

            def combine_bidir(outs):
                out = outs.view(self.num_layers, 2, bsz,
                                -1).transpose(1, 2).contiguous()
                return out.view(self.num_layers, bsz, -1)

            final_hiddens = combine_bidir(final_hiddens)
            final_cells = combine_bidir(final_cells)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        # saving the output to a file
        if self._encoder_states_dir:
            self._save_encoder_state(x, "batch-%s.pt")

        return {
            'encoder_out': (x, final_hiddens, final_cells),
            'encoder_padding_mask':
            encoder_padding_mask if encoder_padding_mask.any() else None
        }
Esempio n. 5
0
 def _make_sample(self, batch=None, xs=None, ys=None):
     """Generate a sample object that Fairseq expects."""
     # add extra info to samples
     if batch is None and xs is None:
         raise ValueError("Must supply either batch or xs")
     if batch is None and ys is None:
         raise ValueError("Must supply either batch or ys")
     if xs is None:
         xs = batch.text_vec
     if ys is None:
         ys = batch.label_vec
     repadded = convert_padding_direction(xs,
                                          self.dict.pad(),
                                          right_to_left=True)
     sample = {}
     sample["id"] = torch.arange(len(xs) - 1)
     sample["net_input"] = {
         "src_tokens": repadded,
         "src_lengths": self._seq_length(xs),
     }
     if ys is not None:
         sample["target"] = ys
         sample["ntokens"] = sum(self._seq_length(ys)).item()
         sample["net_input"]["prev_output_tokens"] = self._right_shifted_ys(
             ys)
     return sample
Esempio n. 6
0
    def forward(self, src_tokens, src_lengths):
        if self.left_pad:
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                self.padding_idx,
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.data.tolist())

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.data.new(*state_size).zero_()
        c0 = x.data.new(*state_size).zero_()
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:

            def combine_bidir(outs):
                return torch.cat([
                    torch.cat([outs[2 * i], outs[2 * i + 1]], dim=0).view(
                        1, bsz, self.output_units)
                    for i in range(self.num_layers)
                ],
                                 dim=0)

            final_hiddens = combine_bidir(final_hiddens)
            final_cells = combine_bidir(final_cells)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        return {
            'encoder_out': (x, final_hiddens, final_cells),
            'encoder_padding_mask':
            encoder_padding_mask if encoder_padding_mask.any() else None
        }
Esempio n. 7
0
    def forward(self, src_tokens, src_lengths):
        # The inputs to the ``forward()`` function are determined by the
        # Task, and in particular the ``'net_input'`` key in each
        # mini-batch. We discuss Tasks in the next tutorial, but for now just
        # know that *src_tokens* has shape `(batch, src_len)` and *src_lengths*
        # has shape `(batch)`.

        # Note that the source is typically padded on the left. This can be
        # configured by adding the `--left-pad-source "False"` command-line
        # argument, but here we'll make the Encoder handle either kind of
        # padding by converting everything to be right-padded.
        if self.args.left_pad_source:
            # Convert left-padding to right-padding.
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                padding_idx=self.dictionary.pad(),
                left_to_right=True
            )

        bsz, seqlen = src_tokens.size()

        # Embed the source.
        x = self.embed_tokens(src_tokens)

        # Apply dropout.
        x = self.dropout(x)

        # Pack the sequence into a PackedSequence object to feed to the LSTM.
        x = nn.utils.rnn.pack_padded_sequence(x, src_lengths, batch_first=True)

        # Get the output from the LSTM.
        _outputs, (_final_hidden, _final_cell) = self.lstm(x)

        x, _ = nn.utils.rnn.pad_packed_sequence(_outputs, padding_value=0)

        assert list(x.size()) == [seqlen, bsz, 2*self.hidden_dim]

        final_hidden = torch.mean(x, dim=0)

        assert list(final_hidden.size()) == [bsz, 2*self.hidden_dim]

        mu = self.context_to_mu(final_hidden)
        logvar = self.context_to_logvar(final_hidden)

        std = torch.exp(0.5 * logvar)
        z = torch.randn(mu.size())
        if torch.cuda.is_available():
            z = z.cuda()
        z = z * std + mu
        # Return the Encoder's output. This can be any object and will be
        # passed directly to the Decoder.
        return {
            # this will have shape `(bsz, hidden_dim)`
            'final_hidden': z,
            'logvar': logvar,
            'mu': mu
        }
Esempio n. 8
0
    def forward(self, src_tokens, src_lengths):
        src_tokens = src_tokens.t()
        if self.left_pad:
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                self.padding_idx,
                left_to_right=True,
            )
        # src_tokens: B x T
        x = F.dropout(self.embed_tokens(src_tokens),
                      p=self.dropout,
                      training=self.training)

        # pack embedded source tokens into a PackedSequence
        if not hasattr(
                self, 'lstm_state') or self.lstm_state[0].size(1) != x.size(1):
            self.lstm_state = tuple((x.new_zeros(self.recurrent_layers * 2,
                                                 x.size(1), x.size(2)),
                                     x.new_zeros(self.recurrent_layers * 2,
                                                 x.size(1), x.size(2))))
            for state in self.lstm_state:
                nn.init.normal_(state, mean=0, std=0.1)
            self.lstm_state = tuple(
                (Parameter(state, requires_grad=self.learn_initial)
                 for state in self.lstm_state))

        x, s = self.recurrent(x, self.lstm_state)

        # unpack outputs and apply dropout
        x = F.dropout(x, p=self.dropout, training=self.training)
        encoder_padding_mask = self.create_mask(src_lengths)

        if encoder_padding_mask is not None:
            x = x.masked_fill_(
                encoder_padding_mask.transpose(0, 1).unsqueeze(-1),
                0.0).type_as(x)

        if self.last_state == 'last':
            encoder_hiddens = self.reshape_bidirectional_encoder_state(
                s[0][-2:, ::])
            encoder_cells = self.reshape_bidirectional_encoder_state(
                s[1][-2:, ::])
        elif self.last_state == 'avg':
            encoder_hiddens = x.sum(dim=0) / x.size(0)
            encoder_cells = self.reshape_bidirectional_encoder_state(
                s[1][-2:, ::])
        else:
            raise NotImplementedError()

        return {
            'encoder_out': (x, encoder_hiddens, encoder_cells),
            'encoder_padding_mask': encoder_padding_mask,  # B x T
        }
Esempio n. 9
0
    def forward(self, src_tokens, src_lengths):
        if self.args.left_pad_source:
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                left_to_right=True,
                padding_idx=self.dictionary.pad())
        x = self.embed_tokens(src_tokens)
        x = self.dropout(x)

        x = nn.utils.rnn.pack_padded_sequence(x, src_lengths, batch_first=True)
        _outputs, (final_hidden, _final_cell) = self.lstm(x)
        return {'final_hidden': final_hidden.squeeze(0)}
Esempio n. 10
0
    def forward(self, src_tokens, src_lengths):
        if self.left_pad:
            src_tokens = utils.convert_padding_direction(src_tokens,
                                                         self.padding_idx,
                                                         left_to_right=True)

        # Fetch the batch size and the sequence length
        bsz, seqlen = src_tokens.size()

        # Embed the tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # Pack the embedded source tokens into a packed sequence instance
        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.data.tolist())

        # Finally, apply the rnn layers
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        c0 = x.new_zeros(*state_size)
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # Unpack the outputs
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)
        x = F.dropout(x, p=self.dropout_out, training=self.training)

        if self.bidirectional:

            def combine_bidirectional_output(outs):
                out = outs.view(self.num_layers, 2, bsz,
                                -1).transpose(1, 2).contiguous()
                return out.view(self.num_layers, -1)

            final_hiddens = combine_bidirectional_output(final_hiddens)
            final_cells = combine_bidirectional_output(final_cells)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        return {
            'encoder_out': (x, final_hiddens, final_cells),
            'encoder_padding_mask':
            encoder_padding_mask if encoder_padding_mask.any() else None
        }
Esempio n. 11
0
    def forward(self, src_tokens, src_lengths):
        if self.left_pad_source:
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                self.padding_idx,
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.data.tolist())

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = Variable(x.data.new(*state_size).zero_())
        c0 = Variable(x.data.new(*state_size).zero_())
        packed_outs, (final_hiddens, final_cells) = self.lstm(
            packed_x,
            (h0, c0),
        )

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:
            bi_final_hiddens, bi_final_cells = [], []
            for i in range(self.num_layers):
                bi_final_hiddens.append(
                    torch.cat((final_hiddens[2 * i], final_hiddens[2 * i + 1]),
                              dim=0).view(bsz, self.output_units))
                bi_final_cells.append(
                    torch.cat((final_cells[2 * i], final_cells[2 * i + 1]),
                              dim=0).view(bsz, self.output_units))
            return x, bi_final_hiddens, bi_final_cells

        return x, final_hiddens, final_cells
Esempio n. 12
0
    def forward(self, src_tokens, src_lengths: Tensor):
        if self.left_pad:
            # nn.utils.rnn.pack_padded_sequence requires right-padding;
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                torch.zeros_like(src_tokens).fill_(self.padding_idx),
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data)

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        c0 = x.new_zeros(*state_size)
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_idx * 1.0)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:

            final_hiddens = self.combine_bidir(final_hiddens, bsz)
            final_cells = self.combine_bidir(final_cells, bsz)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        return {
            'encoder_out': (x, final_hiddens, final_cells),
            'encoder_padding_mask':
            (encoder_padding_mask, torch.empty(0), torch.empty(0)),
        }
Esempio n. 13
0
 def _make_sample(self, xs, ys):
     """Generates a sample object that Fairseq expects."""
     # add extra info to samples
     # TODO: should the right/left padding thing be in torch agent?
     repadded = convert_padding_direction(xs, self.dict.pad(), right_to_left=True)
     sample = {}
     sample["net_input"] = {
         "src_tokens": repadded,
         "src_lengths": self._seq_length(xs),
     }
     if ys is not None:
         sample["target"] = ys
         sample["ntokens"] = sum(self._seq_length(ys)).item()
         sample["net_input"]["prev_output_tokens"] = self._right_shifted_ys(ys)
     return sample
Esempio n. 14
0
    def forward(self, src_tokens, src_lengths):
        if LanguagePairDataset.LEFT_PAD_SOURCE:
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                src_lengths,
                self.padding_idx,
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)
        embed_dim = x.size(2)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.data.tolist())

        # apply LSTM
        h0 = Variable(x.data.new(self.num_layers, bsz, embed_dim).zero_())
        c0 = Variable(x.data.new(self.num_layers, bsz, embed_dim).zero_())
        packed_outs, (final_hiddens, final_cells) = self.lstm(
            packed_x,
            (h0, c0),
        )

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(packed_outs, padding_value=0.)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, embed_dim]

        return x, final_hiddens, final_cells
    def p_choose(
        self, query, key, key_padding_mask=None, attn_mask=None, incremental_state=None
    ):
        """
        query: bsz, tgt_len
        key: bsz, src_len
        key_padding_mask: bsz, src_len
        """
        src_len, bsz, _ = key.size()
        tgt_len, bsz, _ = query.size()
        p_choose = query.new_ones(bsz, tgt_len, src_len)
        p_choose = torch.tril(p_choose, diagonal=self.waitk_lagging - 1)
        p_choose = torch.triu(p_choose, diagonal=self.waitk_lagging - 1)

        if key_padding_mask is not None and key_padding_mask[:, 0].eq(1).any():
            # Left pad source
            # add -1 to the end
            p_choose = p_choose.masked_fill(
                key_padding_mask.float().flip(1).unsqueeze(1).bool(), -1
            )
            p_choose = convert_padding_direction(
                p_choose.view(-1, src_len).long(), padding_idx=-1, right_to_left=True
            )
            p_choose = p_choose.view(bsz, tgt_len, src_len).type_as(query)
            # remove -1
            p_choose[p_choose.eq(-1)] = 0

        # Extend to each head
        p_choose = (
            p_choose.contiguous()
            .unsqueeze(1)
            .expand(-1, self.num_heads, -1, -1)
            .contiguous()
            .view(-1, tgt_len, src_len)
        )

        return p_choose
Esempio n. 16
0
    def forward(self, src_tokens, src_lengths):
        src_tokens1, src_tokens2 = src_tokens
        src_lengths1, src_lengths2 = src_lengths
        if self.left_pad:
            # convert left-padding to right-padding
            src_tokens1 = utils.convert_padding_direction(
                src_tokens1,
                self.padding_idx_1,
                left_to_right=True,
            )
            # src_tokens2 = utils.convert_padding_direction(
            #     src_tokens2,
            #     self.padding_idx_2,
            #     left_to_right=True,
            # )

        bsz1, seqlen1 = src_tokens1.size()
        # bsz2, seqlen2 = src_tokens2.size()

        # embed tokens
        x1 = self.embed_tokens_1(src_tokens1)
        x1 = F.dropout(x1, p=self.dropout_in, training=self.training)
        # x2 = self.embed_tokens_2(src_tokens2)
        # x2 = F.dropout(x2, p=self.dropout_in, training=self.training)
        fconv_dict = self.fconv2(src_tokens2, src_lengths2)
        x2 = fconv_dict["encoder_out"][0]

        # B x T x C -> T x B x C
        x1 = x1.transpose(0, 1)
        x2 = x2.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x1 = nn.utils.rnn.pack_padded_sequence(
            x1, src_lengths1.data.tolist())
        # packed_x2 = nn.utils.rnn.pack_padded_sequence(x2, src_lengths2.data.tolist())

        # apply LSTM
        if self.bidirectional:
            state_size1 = 2 * self.num_layers, bsz1, self.hidden_size
            # state_size2 = 2 * self.num_layers, bsz2, self.hidden_size
        else:
            state_size1 = self.num_layers, bsz1, self.hidden_size
            # state_size2 = self.num_layers, bsz2, self.hidden_size
        h01 = x1.data.new(*state_size1).zero_()
        c01 = x1.data.new(*state_size1).zero_()
        packed_outs1, (final_hiddens1,
                       final_cells1) = self.lstm1(packed_x1, (h01, c01))
        # h02 = x2.data.new(*state_size2).zero_()
        # c02 = x2.data.new(*state_size2).zero_()
        # packed_outs2, (final_hiddens2, final_cells2) = self.lstm2(packed_x2, (h02, c02))

        # unpack outputs and apply dropout
        x1, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs1, padding_value=self.padding_value)
        x1 = F.dropout(x1, p=self.dropout_out, training=self.training)
        assert list(x1.size()) == [seqlen1, bsz1, self.output_units]
        # x2, _ = nn.utils.rnn.pad_packed_sequence(packed_outs2, padding_value=self.padding_value)
        # x2 = F.dropout(x2, p=self.dropout_out, training=self.training)
        # assert list(x2.size()) == [seqlen2, bsz2, self.output_units]

        if self.bidirectional:

            def combine_bidir_1(outs):
                return outs.view(self.num_layers, 2, bsz1,
                                 -1).transpose(1, 2).contiguous().view(
                                     self.num_layers, bsz1, -1)

            # def combine_bidir_2(outs):
            #     return outs.view(self.num_layers, 2, bsz2, -1).transpose(1, 2).contiguous().view(self.num_layers, bsz2, -1)

            final_hiddens_1 = combine_bidir_1(final_hiddens1)
            final_cells_1 = combine_bidir_1(final_cells1)
            # final_hiddens_2 = combine_bidir_2(final_hiddens2)
            # final_cells_2 = combine_bidir_2(final_cells2)

        encoder_padding_mask_1 = src_tokens1.eq(self.padding_idx_1).t()
        encoder_padding_mask_2 = src_tokens2.eq(self.padding_idx_2).t()
        x = torch.cat([x1, x2])
        encoder_padding_mask = torch.cat(
            [encoder_padding_mask_1, encoder_padding_mask_2])

        # HACK: pass hidden state of source 1 (title) to decoder
        return {
            'encoder_out': (x, final_hiddens_1, final_cells_1),
            'encoder_padding_mask':
            encoder_padding_mask if encoder_padding_mask.any() else None,
            'segments': [x1.shape[0]]
        }
Esempio n. 17
0
    def forward(self, src_tokens, src_lengths):
        if self.left_pad and not self.sde:
            # nn.utils.rnn.pack_padded_sequence requires right-padding;
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                self.padding_idx,
                left_to_right=True,
            )
        if self.sde:
            bsz = len(src_tokens)
        else:
            bsz, seqlen = src_tokens.size()
            encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)
        if self.sde:
            seqlen = x.size(0)
            encoder_padding_mask = []
            for s in src_tokens:
                encoder_padding_mask.append(
                    [0 for _ in range(len(s))] +
                    [1 for _ in range(seqlen - len(s))])
            encoder_padding_mask = torch.tensor(encoder_padding_mask,
                                                device=x.device).byte().t()

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.data.tolist())

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        c0 = x.new_zeros(*state_size)
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        #assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:

            def combine_bidir(outs):
                out = outs.view(self.num_layers, 2, bsz,
                                -1).transpose(1, 2).contiguous()
                return out.view(self.num_layers, bsz, -1)

            final_hiddens = combine_bidir(final_hiddens)
            final_cells = combine_bidir(final_cells)

        return {
            'encoder_out': (x, final_hiddens, final_cells),
            'encoder_padding_mask':
            encoder_padding_mask if encoder_padding_mask.any() else None
        }
Esempio n. 18
0
    def forward(self, src_tokens, src_lengths):
        if self.left_pad:
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(src_tokens,
                                                         self.padding_idx,
                                                         left_to_right=True)
        if self.word_dropout_module is not None:
            src_tokens = self.word_dropout_module(src_tokens)
        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # Generate packed seq to deal with varying source seq length
        packed_input, batch_sizes = pack_padded_sequence(x, src_lengths)
        final_hiddens, final_cells = [], []
        next_hiddens = []
        for i, rnn_layer in enumerate(self.layers):
            current_hidden_size = (self.hidden_dim //
                                   2 if rnn_layer.is_bidirectional else
                                   self.hidden_dim)

            if self.cell_type in ["lstm", "milstm", "layer_norm_lstm"]:
                prev_hidden = (
                    x.new(bsz, current_hidden_size).zero_(),
                    x.new(bsz, current_hidden_size).zero_(),
                )
            else:
                raise Exception(f"{self.cell_type} not implemented")

            hidden, current_output = rnn_layer.forward(packed_input,
                                                       prev_hidden,
                                                       batch_sizes)
            next_hiddens.append(hidden)
            prev_hidden = next_hiddens[-1]

            if self.dropout_out != 0:
                current_output = F.dropout(current_output,
                                           p=self.dropout_out,
                                           training=self.training)

            if self.residual_level is not None and i >= self.residual_level:
                packed_input = packed_input.clone() + current_output
            else:
                packed_input = current_output

        final_hiddens, final_cells = zip(*next_hiddens)
        # Reshape to [num_layer, batch_size, hidden_dim]
        final_hiddens = torch.cat(final_hiddens,
                                  dim=0).view(self.num_layers,
                                              *final_hiddens[0].size())
        final_cells = torch.cat(final_cells,
                                dim=0).view(self.num_layers,
                                            *final_cells[0].size())

        #  [max_seqlen, batch_size, hidden_dim]
        unpacked_output, _ = pad_packed_sequence(
            PackedSequence(packed_input, batch_sizes),
            padding_value=self.padding_value)

        return (unpacked_output, final_hiddens, final_cells, src_lengths,
                src_tokens)
Esempio n. 19
0
    def forward(self, src_tokens, src_lengths):
        if self.left_pad:
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(src_tokens,
                                                         self.padding_idx,
                                                         left_to_right=True)

        # If we're generating adversarial examples we need to keep track of
        # some internal variables
        self.tracker.reset()

        if self.word_dropout_module is not None:
            src_tokens = self.word_dropout_module(src_tokens)

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        # Track token embeddings
        self.tracker.track(x,
                           "token_embeddings",
                           retain_grad=self.track_gradients)

        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # Allows compatibility with Caffe2 inputs for tracing (int32)
        # as well as the current format of Fairseq-Py inputs (int64)
        if src_lengths.dtype is torch.int64:
            src_lengths = src_lengths.int()

        # Generate packed seq to deal with varying source seq length
        # packed_input is of type PackedSequence, which consists of:
        # element [0]: a tensor, the packed data, and
        # element [1]: a list of integers, the batch size for each step
        packed_input = pack_padded_sequence(x, src_lengths)

        final_hiddens, final_cells = [], []
        for i, rnn_layer in enumerate(self.layers):
            if self.bidirectional and i == 0:
                h0 = x.new(2, bsz, self.hidden_dim // 2).zero_()
                c0 = x.new(2, bsz, self.hidden_dim // 2).zero_()
            else:
                h0 = x.new(1, bsz, self.hidden_dim).zero_()
                c0 = x.new(1, bsz, self.hidden_dim).zero_()

            # apply LSTM along entire sequence
            current_output, (h_last,
                             c_last) = rnn_layer(packed_input, (h0, c0))

            # final state shapes: (bsz, hidden_dim)
            if self.bidirectional and i == 0:
                # concatenate last states for forward and backward LSTM
                h_last = torch.cat((h_last[0, :, :], h_last[1, :, :]), dim=1)
                c_last = torch.cat((c_last[0, :, :], c_last[1, :, :]), dim=1)
            else:
                h_last = h_last.squeeze(dim=0)
                c_last = c_last.squeeze(dim=0)

            final_hiddens.append(h_last)
            final_cells.append(c_last)

            if self.residual_level is not None and i >= self.residual_level:
                packed_input[0] = packed_input.clone()[0] + current_output[0]
            else:
                packed_input = current_output

        # Reshape to [num_layer, batch_size, hidden_dim]
        final_hiddens = torch.cat(final_hiddens,
                                  dim=0).view(self.num_layers,
                                              *final_hiddens[0].size())
        final_cells = torch.cat(final_cells,
                                dim=0).view(self.num_layers,
                                            *final_cells[0].size())

        #  [max_seqlen, batch_size, hidden_dim]
        unpacked_output, _ = pad_packed_sequence(
            packed_input, padding_value=self.padding_value)

        return (unpacked_output, final_hiddens, final_cells, src_lengths,
                src_tokens)
Esempio n. 20
0
    def forward(self, src_tokens, src_lengths: Tensor):
        if self.left_pad:
            # nn.utils.rnn.pack_padded_sequence requires right-padding;
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                torch.zeros_like(src_tokens).fill_(self.padding_idx),
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # MIE MODIFICHE
        x = []
        lengths = []
        toks = []
        for line in src_tokens:
            records = []
            words = self.dictionary.string(line).split()
            couples = list(map(' '.join, zip(words[0::2], words[1::2])))
            for couple in couples:
                encoded_feature = self.dictionary.encode_line(couple).tolist()
                records.append(encoded_feature[0:-1])
            lengths.append(len(records))
            toks.append([0] * len(records))
            x.append(records)

        device = 'cuda' if torch.cuda.is_available() else 'cpu'

        x = torch.tensor(x, dtype=torch.float32, device=torch.device(device))
        src_tokens = torch.tensor(toks,
                                  dtype=torch.int32,
                                  device=torch.device(device))
        src_lengths = torch.tensor(lengths,
                                   dtype=torch.int32,
                                   device=torch.device(device))

        seqlen = src_lengths[0]

        # FINO A QUA

        # embed tokens
        #x = self.embed_tokens(src_tokens)

        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data)

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        c0 = x.new_zeros(*state_size)
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_idx * 1.0)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:
            final_hiddens = self.combine_bidir(final_hiddens, bsz)
            final_cells = self.combine_bidir(final_cells, bsz)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        return tuple((
            x,  # seq_len x batch x hidden
            final_hiddens,  # num_layers x batch x num_directions*hidden
            final_cells,  # num_layers x batch x num_directions*hidden
            encoder_padding_mask,  # seq_len x batch
        ))
Esempio n. 21
0
    def forward(self, src_tokens=None, src_lengths=None, token_embeds=None):
        if self.left_pad:
            # nn.utils.rnn.pack_padded_sequence requires right-padding;
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                self.padding_idx,
                left_to_right=True,
            )

        if token_embeds is None:
            bsz, seqlen = src_tokens.size()
            # embed tokens
            x = self.embed_tokens(src_tokens)
        else:

            x = token_embeds
            bsz, seqlen, embed_dim = token_embeds.shape
            assert embed_dim == self.embed_dim

        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence

        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.cpu(),
                                                     enforce_sorted=False)

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        c0 = x.new_zeros(*state_size)
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:

            def combine_bidir(outs):
                out = (outs.view(self.num_layers, 2, bsz,
                                 -1).transpose(1, 2).contiguous())
                return out.view(self.num_layers, bsz, -1)

            final_hiddens = combine_bidir(final_hiddens)
            final_cells = combine_bidir(final_cells)

        if src_tokens is not None:
            encoder_padding_mask = src_tokens.eq(self.padding_idx).t()
        else:
            encoder_padding_mask = None

        return {
            "encoder_out": (x, final_hiddens, final_cells),
            "encoder_padding_mask":
            (encoder_padding_mask if encoder_padding_mask is not None
             and encoder_padding_mask.any() else None),
        }
Esempio n. 22
0
    def forward(
        self,
        src_tokens: Tensor,
        src_lengths: Tensor,
        enforce_sorted: bool = False,
    ):
        """
        Args:
            src_tokens (LongTensor): tokens in the source language of
                shape `(batch, src_len)`
            src_lengths (LongTensor): lengths of each source sentence of
                shape `(batch)`
            enforce_sorted (bool, optional): if True, `src_tokens` is
                expected to contain sequences sorted by length in a
                decreasing order. If False, this condition is not
                required. Default: True.
        """
        if self.left_pad_source:
            # nn.utils.rnn.pack_padded_sequence requires right-padding;
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                torch.zeros_like(src_tokens).fill_(self.padding_idx),
                left_to_right=True,
            )
        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = self.dropout_in_module(x)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(
            x, src_lengths.cpu(), enforce_sorted=enforce_sorted,
            batch_first=True
        )

        packed_outs, hidden = self.hidden(packed_x)

        outputs, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_idx * 1.0,
            batch_first=True
        )

        if self.bidirectional:
            fwd_final, bwd_final = outputs.view(bsz, max(src_lengths), self.hidden_size, 2).permute(3, 0, 1, 2)
            outputs = torch.cat((fwd_final.unsqueeze(-1), bwd_final.unsqueeze(-1)), -1)
            outputs = self.bidir_dense(outputs).squeeze(-1)

        outputs = self.dropout_out_module(outputs)

        if self.rnn_type == "lstm":
            final_hiddens = self.reshape_state(hidden[0], bsz)
            final_cells = self.reshape_state(hidden[1], bsz)
        else:
            final_hiddens, final_cells = self.reshape_state(hidden, bsz), None

        assert list(outputs.size()) == [bsz, seqlen, self.output_units]

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        return tuple(
            (
                outputs,  # batch x seq_len x hidden
                final_hiddens,  # num_layers x batch x num_directions*hidden
                final_cells,  # num_layers x batch x num_directions*hidden
                encoder_padding_mask,  # seq_len x batch
            )
        )
Esempio n. 23
0
    def forward(self, src_tokens, src_lengths):
        if LanguagePairDataset.LEFT_PAD_SOURCE:
            # convert left-padding to right-padding
            src_tokens.data = utils.convert_padding_direction(
                src_tokens.data,
                src_lengths.data,
                self.padding_idx,
                left_to_right=True)
        if self.word_dropout_module is not None:
            src_tokens.data = self.word_dropout_module(src_tokens.data)

        if self.char_rnn_params is not None:
            # x.shape: (max_num_words, batch_size, word_dim)
            x, src_lengths = self.char_rnn_encoder(src_tokens, src_lengths)
            seqlen, bsz, _ = x.size()

            # temporarily sort in descending word-length order
            src_lengths, word_len_order = torch.sort(src_lengths,
                                                     descending=True)
            x = x[:, word_len_order, :]
            _, inverted_word_len_order = torch.sort(word_len_order)
        else:
            bsz, seqlen = src_tokens.size()

            # embed tokens
            x = self.embed_tokens(src_tokens)
            x = F.dropout(x, p=self.dropout_in, training=self.training)

            # B x T x C -> T x B x C
            x = x.transpose(0, 1)

        # Allows compatibility with Caffe2 inputs for tracing (int32)
        # as well as the current format of Fairseq-Py inputs (int64)
        if src_lengths.dtype is torch.int64:
            src_lengths = src_lengths.int()

        # Generate packed seq to deal with varying source seq length
        # packed_input is of type PackedSequence, which consists of:
        # element [0]: a tensor, the packed data, and
        # element [1]: a list of integers, the batch size for each step
        packed_input = pack_padded_sequence(x, src_lengths)

        final_hiddens, final_cells = [], []
        for i, rnn_layer in enumerate(self.layers):
            if self.bidirectional and i == 0:
                h0 = x.data.new(2, bsz, self.hidden_dim // 2).zero_()
                c0 = x.data.new(2, bsz, self.hidden_dim // 2).zero_()
            else:
                h0 = x.data.new(1, bsz, self.hidden_dim).zero_()
                c0 = x.data.new(1, bsz, self.hidden_dim).zero_()

            # apply LSTM along entire sequence
            current_output, (h_last,
                             c_last) = rnn_layer(packed_input, (h0, c0))

            # final state shapes: (bsz, hidden_dim)
            if self.bidirectional and i == 0:
                # concatenate last states for forward and backward LSTM
                h_last = torch.cat((h_last[0, :, :], h_last[1, :, :]), dim=1)
                c_last = torch.cat((c_last[0, :, :], c_last[1, :, :]), dim=1)
            else:
                h_last = h_last.squeeze(dim=0)
                c_last = c_last.squeeze(dim=0)

            final_hiddens.append(h_last)
            final_cells.append(c_last)

            if self.residual_level is not None and i >= self.residual_level:
                packed_input[0] = packed_input.clone()[0] + current_output[0]
            else:
                packed_input = current_output

        # Reshape to [num_layer, batch_size, hidden_dim]
        final_hiddens = torch.cat(final_hiddens,
                                  dim=0).view(self.num_layers,
                                              *final_hiddens[0].size())
        final_cells = torch.cat(final_cells,
                                dim=0).view(self.num_layers,
                                            *final_cells[0].size())

        #  [max_seqlen, batch_size, hidden_dim]
        padding_value = -np.inf if self.add_encoder_output_as_decoder_input else 0
        unpacked_output, _ = pad_packed_sequence(packed_input,
                                                 padding_value=padding_value)

        if self.char_rnn_params is not None:
            unpacked_output = unpacked_output[:, inverted_word_len_order, :]
            final_hiddens = final_hiddens[:, inverted_word_len_order, :]
            final_cells = final_cells[:, inverted_word_len_order, :]
            src_lengths = src_lengths[inverted_word_len_order]
            src_tokens = src_tokens[inverted_word_len_order, :]

        return (unpacked_output, final_hiddens, final_cells, src_lengths,
                src_tokens)
Esempio n. 24
0
    def forward(self,
                tokens,
                lengths=None,
                precomputed_embedded=None,
                **kwargs):

        bsz, seqlen = tokens.size()

        if self.left_pad:
            # convert left-padding to right-padding
            tokens = utils.convert_padding_direction(
                tokens,
                self.padding_idx,
                left_to_right=True,
            )
        if lengths is None:
            lengths = (tokens != self.padding_idx).sum(1)

        if precomputed_embedded is None:
            x = self.embed_tokens(tokens)
        else:
            x = precomputed_embedded
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        if self.fc_in:
            x = self.fc_in(x)

        # sorting sequences by len otherwise pack_padded_sequence will complain
        lengths_sorted, perm_index = lengths.sort(0, descending=True)
        if (lengths_sorted != lengths).sum():
            needs_perm = True
            x = x[perm_index]
            lengths = lengths_sorted
        else:
            needs_perm = False

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        packed_x = torch.nn.utils.rnn.pack_padded_sequence(
            x, lengths.data.tolist())
        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.embed_dim
        else:
            state_size = self.num_layers, bsz, self.embed_dim
        h0 = x.data.new(*state_size).zero_()
        c0 = x.data.new(*state_size).zero_()
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = torch.nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)

        x = F.dropout(x, p=self.dropout_out, training=self.training)
        #assert list(x.size()) == [seqlen, bsz, self.output_units]

        # T x B x C -> B x T x C
        x = x.transpose(0, 1)

        # restoring original order
        if needs_perm:
            odx = perm_index.view(-1, 1).unsqueeze(1).expand_as(x)
            x = x.gather(0, odx)

        if self.bidirectional:

            def combine_bidir(outs):
                return outs.view(self.num_layers, 2, bsz,
                                 -1).transpose(1, 2).contiguous().view(
                                     self.num_layers, bsz, -1)

            final_hiddens = combine_bidir(final_hiddens)
            final_cells = combine_bidir(final_cells)

            x = x.view(x.size(0), x.size(1), 2, -1).sum(2)

        if self.fc_out1 is not None:
            x = self.fc_out1(x)

        if self.adaptive_softmax is None and self.fc_out2 is not None:
            x = self.fc_out2(x)

        return x, {'hidden_states': (final_hiddens, final_cells)}
Esempio n. 25
0
    def forward(
        self,
        src_tokens: Tensor,
        src_lengths: Tensor,
        enforce_sorted: bool = True,
    ):
        """
        Args:
            src_tokens (LongTensor): tokens in the source language of
                shape `(batch, src_len)`
            src_lengths (LongTensor): lengths of each source sentence of
                shape `(batch)`
            enforce_sorted (bool, optional): if True, `src_tokens` is
                expected to contain sequences sorted by length in a
                decreasing order. If False, this condition is not
                required. Default: True.
        """
        if self.left_pad:
            # nn.utils.rnn.pack_padded_sequence requires right-padding;
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                torch.zeros_like(src_tokens).fill_(self.padding_idx),
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(
            x, src_lengths.data, enforce_sorted=enforce_sorted)

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        c0 = x.new_zeros(*state_size)
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_idx * 1.0)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:
            final_hiddens = self.combine_bidir(final_hiddens, bsz)
            final_cells = self.combine_bidir(final_cells, bsz)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        return tuple((
            x,  # seq_len x batch x hidden
            final_hiddens,  # num_layers x batch x num_directions*hidden
            final_cells,  # num_layers x batch x num_directions*hidden
            encoder_padding_mask,  # seq_len x batch
        ))
Esempio n. 26
0
    def forward(self, src_tokens, src_lengths):
        if self.left_pad:
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                self.padding_idx,
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        # bert embedding
        segments_tensors = torch.zeros_like(src_tokens).to(src_tokens.device)
        # self.bert.eval()
        # with torch.no_grad():
        encoded_layers, _ = self.bert(
            src_tokens, segments_tensors)  # (bsz, length, dimension)
        x = torch.cat((x, encoded_layers[self.layer]), 2)

        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.data.tolist())

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        c0 = x.new_zeros(*state_size)
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:

            def combine_bidir(outs):
                out = outs.view(self.num_layers, 2, bsz,
                                -1).transpose(1, 2).contiguous()
                return out.view(self.num_layers, bsz, -1)

            final_hiddens = combine_bidir(final_hiddens)
            final_cells = combine_bidir(final_cells)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        return {
            'encoder_out': (x, final_hiddens, final_cells),
            'encoder_padding_mask':
            encoder_padding_mask if encoder_padding_mask.any() else None
        }
Esempio n. 27
0
    def forward(self,
                src_tokens,
                src_lengths,
                word_tokens=None,
                bert_repre=None,
                **kwargs):

        words = None
        chars = None
        if word_tokens is not None and self.embed_words is not None:
            words = self.embed_words(word_tokens).squeeze().detach()
            if self.char_embed is not None:
                chars = self.char_embed(word_tokens).squeeze()

        if self.left_pad:
            # nn.utils.rnn.pack_padded_sequence requires right-padding;
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(src_tokens,
                                                         self.padding_idx,
                                                         left_to_right=True)

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.data.tolist(),
                                                     enforce_sorted=False)

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        if self.rnn_type == "lstm":
            c0 = x.new_zeros(*state_size)
            packed_outs, _ = self.rnn(packed_x, (h0, c0))
        else:
            packed_outs, _ = self.rnn(packed_x, h0)

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        if word_tokens is not None:
            encoder_summary = self.summary_network(x, words,
                                                   encoder_padding_mask)
        else:
            encoder_summary = None
        return {
            "encoder_out": (x, ),
            "encoder_padding_mask":
            encoder_padding_mask if encoder_padding_mask.any() else None,
            "encoder_summary":
            encoder_summary,
            "words":
            words,
            "chars":
            chars,
            'bert_repre':
            bert_repre
        }
Esempio n. 28
0
    def forward(self, src_tokens, src_lengths):
        if self.left_pad:
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                self.padding_idx,
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x,
                                                     src_lengths.data.tolist())

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.data.new(*state_size).zero_()
        c0 = x.data.new(*state_size).zero_()
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_value)
        x = F.dropout(x, p=self.dropout_out, training=self.training)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:

            def combine_bidir(outs):
                return torch.cat([
                    torch.cat([outs[2 * i], outs[2 * i + 1]], dim=0).view(
                        1, bsz, self.output_units)
                    for i in range(self.num_layers)
                ],
                                 dim=0)

            final_hiddens = combine_bidir(final_hiddens)
            final_cells = combine_bidir(final_cells)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        # Set padded outputs to -inf so they are not selected by max-pooling
        padding_mask = src_tokens.eq(self.padding_idx).t().unsqueeze(-1)
        if padding_mask.any():
            x = x.float().masked_fill_(padding_mask, float('-inf')).type_as(x)

        # Build the sentence embedding by max-pooling over the encoder outputs
        #liwei comment should try self-attention here
        sentemb = x.max(dim=0)[0]

        return {
            'sentemb':
            sentemb,
            'encoder_out': (x, final_hiddens, final_cells),
            'encoder_padding_mask':
            encoder_padding_mask if encoder_padding_mask.any() else None
        }
Esempio n. 29
0
    def forward(
        self,
        src_tokens: Tensor,
        src_lengths: Tensor,
        enforce_sorted: bool = True,
    ):
        """
        Args:
            src_tokens (LongTensor): tokens in the source language of
                shape `(batch, src_len)`
            src_lengths (LongTensor): lengths of each source sentence of
                shape `(batch)`
            enforce_sorted (bool, optional): if True, `src_tokens` is
                expected to contain sequences sorted by length in a
                decreasing order. If False, this condition is not
                required. Default: True.
        """
        if self.left_pad:
            # nn.utils.rnn.pack_padded_sequence requires right-padding;
            # convert left-padding to right-padding
            src_tokens = utils.convert_padding_direction(
                src_tokens,
                torch.zeros_like(src_tokens).fill_(self.padding_idx),
                left_to_right=True,
            )
        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Xử lí cộng thông tin synset được mã hóa
        # Bước 5: duyệt qua tập các từ, lấy ra danh sách các synset của nó, mặc định chọn synset đầu tiên( improve)
        # Bước 6: Ánh xạ synset_id của mỗi từ ra index tương ứng.
        self.to(device)
        src_emb = []
        # document: https://stackoverflow.com/questions/15388831/what-are-all-possible-pos-tags-of-nltk
        for sentence in src_tokens:
            s = [self.dictionary[idx] for idx in sentence]
            s_pos = nltk.pos_tag(s)
            wrd_pos = [
                self.wnl.lemmatize(w) + '\t' + map_treebankTags_to_wn(pos)
                for w, pos in s_pos
            ]
            emb_sentence = []
            for w in wrd_pos:
                pos = w.split('\t')[1]
                if pos != 'None':
                    try:
                        synset_name = self.word_synset[w][0][
                            1]  # lấy synset id đầu tiên và ánh xạ ra synset_name
                        # Ánh xạ từ synset_name ra cluster id
                        cluster_name = self.synset_to_clusterID_per_pos[pos][
                            synset_name]
                    except:
                        cluster_name = 'None'
                    cluster_id = self.cluster2idx_per_pos[pos][cluster_name]
                    cluster_id = torch.tensor(cluster_id).to(device)
                    emb_sentence.append(
                        self.embed_cluster_per_pos[pos](cluster_id))
                else:
                    cluster_id = len(self.cluster2idx_per_pos['n']) - 1
                    cluster_id = torch.tensor(cluster_id).to(device)
                    emb_sentence.append(
                        self.embed_cluster_per_pos['n'](cluster_id))

            src_emb.append(torch.stack(emb_sentence))

        # Bước 7: lấy emb tương ứng của mỗi synset dựa vào embedding đã tạo trước đó.
        x_emb = torch.stack(src_emb).to(device)
        # Bước 8: cộng ma trận embedding này vào biến x theo kiểu concat vào.
        x = torch.cat((x, x_emb), 2)
        x = self.dropout_in_module(x)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(
            x, src_lengths.cpu(), enforce_sorted=enforce_sorted)

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.new_zeros(*state_size)
        c0 = x.new_zeros(*state_size)
        packed_outs, (final_hiddens,
                      final_cells) = self.lstm(packed_x, (h0, c0))  # er

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(
            packed_outs, padding_value=self.padding_idx * 1.0)
        x = self.dropout_out_module(x)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:
            final_hiddens = self.combine_bidir(final_hiddens, bsz)
            final_cells = self.combine_bidir(final_cells, bsz)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        return tuple((
            x,  # seq_len x batch x hidden
            final_hiddens,  # num_layers x batch x num_directions*hidden
            final_cells,  # num_layers x batch x num_directions*hidden
            encoder_padding_mask,  # seq_len x batch
        ))
Esempio n. 30
0
    def forward(self, src_tokens, src_lengths):
        if LanguagePairDataset.LEFT_PAD_SOURCE:
            # convert left-padding to right-padding
            src_tokens.data = utils.convert_padding_direction(
                src_tokens.data,
                src_lengths.data,
                self.padding_idx,
                left_to_right=True,
            )
        if self.word_dropout_module is not None:
            src_tokens.data = self.word_dropout_module(src_tokens.data)
        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # Generate packed seq to deal with varying source seq length
        packed_input, batch_sizes = pack_padded_sequence(
            x,
            src_lengths,
        )
        final_hiddens, final_cells = [], []
        next_hiddens = []
        for i, rnn_layer in enumerate(self.layers):
            current_hidden_size = self.hidden_dim // 2 if \
                rnn_layer.is_bidirectional else self.hidden_dim
            if self.cell_type in ['lstm', 'milstm', 'layer_norm_lstm']:
                prev_hidden = (
                    x.data.new(bsz, current_hidden_size).zero_(),
                    x.data.new(bsz, current_hidden_size).zero_(),
                )
            else:
                raise Exception('{} not implemented'.format(self.cell_type))

            hidden, current_output = rnn_layer.forward(
                packed_input,
                prev_hidden,
                batch_sizes,
            )
            next_hiddens.append(hidden)
            prev_hidden = next_hiddens[-1]

            if self.dropout_out != 0:
                current_output = F.dropout(
                    current_output,
                    p=self.dropout_out,
                    training=self.training,
                )

            if self.residual_level is not None and i >= self.residual_level:
                packed_input = packed_input.clone() + current_output
            else:
                packed_input = current_output

        final_hiddens, final_cells = zip(*next_hiddens)
        # Reshape to [num_layer, batch_size, hidden_dim]
        final_hiddens = torch.cat(
            final_hiddens,
            dim=0,
        ).view(self.num_layers, *final_hiddens[0].size())
        final_cells = torch.cat(
            final_cells,
            dim=0,
        ).view(self.num_layers, *final_cells[0].size())

        #  [max_seqlen, batch_size, hidden_dim]
        padding_value = -np.inf if self.add_encoder_output_as_decoder_input else 0
        unpacked_output, _ = pad_packed_sequence(
            PackedSequence(packed_input, batch_sizes),
            padding_value=padding_value,
        )

        return (
            unpacked_output,
            final_hiddens,
            final_cells,
            src_lengths,
            src_tokens,
        )