Exemple #1
0
 def test_cat(self):
     a = torch.IntTensor([[1, 2, 3], [4, 5, 6]])
     b = torch.IntTensor([[11, 12, 13], [14, 15, 16]])
     ab = torch.IntTensor([[1, 2, 3, 11, 12, 13], [4, 5, 6, 14, 15, 16]])
     npt.assert_array_equal(pytorch_utils.maybe_cat([a, b], dim=1), ab)
     npt.assert_array_equal(
         pytorch_utils.maybe_cat([a, None, b, None, None], dim=1), ab
     )
     npt.assert_array_equal(pytorch_utils.maybe_cat([None, None, a, None], dim=1), a)
Exemple #2
0
    def forward_unprojected(self,
                            input_tokens,
                            encoder_out,
                            incremental_state=None):
        padded_tokens = F.pad(
            input_tokens,
            (self.history_len - 1, 0, 0, 0),
            "constant",
            self.dst_dict.eos(),
        )
        # We use incremental_state only to check whether we are decoding or not
        # self.training is false even for the forward pass through validation
        if incremental_state is not None:
            padded_tokens = padded_tokens[:, -self.history_len:]
        utils.set_incremental_state(self, incremental_state,
                                    "incremental_marker", True)

        bsz, seqlen = padded_tokens.size()
        seqlen -= self.history_len - 1

        # get outputs from encoder
        (encoder_outs, final_hidden, _, src_lengths, _) = encoder_out

        # padded_tokens has shape [batch_size, seq_len+history_len]
        x = self.embed_tokens(padded_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)

        # Convolution needs shape [batch_size, channels, seq_len]
        x = self.history_conv(x.transpose(1, 2)).transpose(1, 2)
        x = F.dropout(x, p=self.dropout_out, training=self.training)

        # x has shape [batch_size, seq_len, channels]
        for i, layer in enumerate(self.layers):
            prev_x = x
            x = layer(x)
            x = F.dropout(x, p=self.dropout_out, training=self.training)
            if self.residual_level is not None and i >= self.residual_level:
                x = x + prev_x

        # Attention
        attn_out, attn_scores = self.attention(
            x.transpose(0, 1).contiguous().view(-1, self.hidden_dim),
            encoder_outs.repeat(1, seqlen, 1),
            src_lengths.repeat(seqlen),
        )
        if attn_out is not None:
            attn_out = attn_out.view(seqlen, bsz, -1).transpose(1, 0)
        attn_scores = attn_scores.view(-1, seqlen, bsz).transpose(0, 2)
        x = maybe_cat((x, attn_out), dim=2)

        # bottleneck layer
        if hasattr(self, "additional_fc"):
            x = self.additional_fc(x)
            x = F.dropout(x, p=self.dropout_out, training=self.training)
        return x, attn_scores
Exemple #3
0
    def forward_unprojected(self,
                            input_tokens,
                            encoder_out,
                            incremental_state=None):
        if incremental_state is not None:
            input_tokens = input_tokens[:, -1:]
        bsz, seqlen = input_tokens.size()

        # get outputs from encoder
        (encoder_outs, final_hidden, final_cell, src_lengths,
         src_tokens) = encoder_out

        # embed tokens
        x = self.embed_tokens(input_tokens)
        x = F.dropout(x, p=self.dropout_in, training=self.training)
        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # initialize previous states (or get from cache during incremental generation)
        cached_state = utils.get_incremental_state(self, incremental_state,
                                                   "cached_state")
        input_feed = None
        if cached_state is not None:
            prev_hiddens, prev_cells, input_feed = cached_state
        else:
            # first time step, initialize previous states
            prev_hiddens, prev_cells = self._init_prev_states(encoder_out)
            if self.attention.context_dim:
                input_feed = self.initial_attn_context.expand(
                    bsz, self.attention.context_dim)

        attn_scores_per_step = []
        outs = []
        for j in range(seqlen):
            # input feeding: concatenate context vector from previous time step
            step_input = maybe_cat((x[j, :, :], input_feed), dim=1)
            previous_layer_input = step_input
            for i, rnn in enumerate(self.layers):
                # recurrent cell
                hidden, cell = rnn(step_input,
                                   (prev_hiddens[i], prev_cells[i]))

                # hidden state becomes the input to the next layer
                layer_output = F.dropout(hidden,
                                         p=self.dropout_out,
                                         training=self.training)

                if self.residual_level is not None and i >= self.residual_level:
                    # TODO add an assert related to sizes here
                    step_input = layer_output + previous_layer_input
                else:
                    step_input = layer_output
                previous_layer_input = step_input

                # save state for next time step
                prev_hiddens[i] = hidden
                prev_cells[i] = cell

            out, step_attn_scores = self.attention(hidden, encoder_outs,
                                                   src_lengths)
            input_feed = out
            attn_scores_per_step.append(step_attn_scores.unsqueeze(1))
            attn_scores = torch.cat(attn_scores_per_step, dim=1)
            # srclen x tgtlen x bsz -> bsz x tgtlen x srclen
            attn_scores = attn_scores.transpose(0, 2)
            combined_output_and_context = maybe_cat((hidden, out), dim=1)

            # save final output
            outs.append(combined_output_and_context)

        # cache previous states (no-op except during incremental generation)
        utils.set_incremental_state(
            self,
            incremental_state,
            "cached_state",
            (prev_hiddens, prev_cells, input_feed),
        )

        # collect outputs across time steps
        x = torch.cat(outs, dim=0).view(seqlen, bsz,
                                        self.combined_output_and_context_dim)

        # T x B x C -> B x T x C
        x = x.transpose(1, 0)

        # bottleneck layer
        if hasattr(self, "additional_fc"):
            x = self.additional_fc(x)
            x = F.dropout(x, p=self.dropout_out, training=self.training)
        return x, attn_scores
Exemple #4
0
 def test_nullable(self):
     a = torch.IntTensor([[1, 2, 3], [4, 5, 6]])
     pytorch_utils.maybe_cat([a, None], 1)
     pytorch_utils.maybe_cat([a, None], 1, nullable=[True, True])
     pytorch_utils.maybe_cat([a, None], 1, nullable=[False, True])
     with self.assertRaises(RuntimeError):
         pytorch_utils.maybe_cat([a, None], 1, nullable=[False, False])
     with self.assertRaises(RuntimeError):
         pytorch_utils.maybe_cat([None, None], 1)
     with self.assertRaises(RuntimeError):
         pytorch_utils.maybe_cat([], 1)