Ejemplo n.º 1
0
    def test_make_positions(self):
        pad = 1
        left_pad_input = torch.LongTensor([
            [9, 9, 9, 9, 9],
            [1, 9, 9, 9, 9],
            [1, 1, 1, 9, 9],
        ])
        left_pad_output = torch.LongTensor([
            [2, 3, 4, 5, 6],
            [1, 2, 3, 4, 5],
            [1, 1, 1, 2, 3],
        ])
        right_pad_input = torch.LongTensor([
            [9, 9, 9, 9, 9],
            [9, 9, 9, 9, 1],
            [9, 9, 1, 1, 1],
        ])
        right_pad_output = torch.LongTensor([
            [2, 3, 4, 5, 6],
            [2, 3, 4, 5, 1],
            [2, 3, 1, 1, 1],
        ])

        self.assertAlmostEqual(
            left_pad_output,
            utils.make_positions(left_pad_input, pad, left_pad=True),
        )
        self.assertAlmostEqual(
            right_pad_output,
            utils.make_positions(right_pad_input, pad, left_pad=False),
        )
Ejemplo n.º 2
0
    def test_make_positions(self):
        pad = 1
        left_pad_input = torch.LongTensor([
            [9, 9, 9, 9, 9],
            [1, 9, 9, 9, 9],
            [1, 1, 1, 9, 9],
        ])
        left_pad_output = torch.LongTensor([
            [2, 3, 4, 5, 6],
            [1, 2, 3, 4, 5],
            [1, 1, 1, 2, 3],
        ])
        right_pad_input = torch.LongTensor([
            [9, 9, 9, 9, 9],
            [9, 9, 9, 9, 1],
            [9, 9, 1, 1, 1],
        ])
        right_pad_output = torch.LongTensor([
            [2, 3, 4, 5, 6],
            [2, 3, 4, 5, 1],
            [2, 3, 1, 1, 1],
        ])

        self.assertAlmostEqual(
            left_pad_output,
            utils.make_positions(left_pad_input, pad, left_pad=True),
        )
        self.assertAlmostEqual(
            right_pad_output,
            utils.make_positions(right_pad_input, pad, left_pad=False),
        )
Ejemplo n.º 3
0
    def forward(
        self,
        input: Tensor,
        incremental_state: Optional[Dict[str, Dict[str,
                                                   Optional[Tensor]]]] = None,
        positions: Optional[Tensor] = None,
    ):
        """Input is expected to be of size [bsz x seqlen]."""
        assert (positions is None) or (
            self.padding_idx is None
        ), "If positions is pre-computed then padding_idx should not be set."

        if positions is None:
            if incremental_state is not None:
                # positions is the same for every token when decoding a single step
                # Without the int() cast, it doesn't work in some cases when exporting to ONNX
                positions = torch.zeros(
                    (1, 1), device=input.device, dtype=input.dtype).fill_(
                        int(self.padding_idx + input.size(1)))
            else:
                positions = utils.make_positions(input,
                                                 self.padding_idx,
                                                 onnx_trace=self.onnx_trace)
        return F.embedding(
            positions,
            self.weight,
            self.padding_idx,
            self.max_norm,
            self.norm_type,
            self.scale_grad_by_freq,
            self.sparse,
        )
Ejemplo n.º 4
0
    def forward(self, input, incremental_state=None, timestep=None):
        """Input is expected to be of size [bsz x seqlen]."""
        bsz, seq_len = torch.onnx.operators.shape_as_tensor(input)
        max_pos = self.padding_idx + 1 + seq_len
        if self.weights is None or max_pos > self.weights.size(0):
            # recompute/expand embeddings if needed
            self.weights = SinusoidalPositionalEmbedding.get_embedding(
                max_pos,
                self.embedding_dim,
                self.padding_idx,
            )
        self.weights = self.weights.type_as(self._float_tensor)

        if incremental_state is not None:
            # positions is the same for every token when decoding a single step
            pos = (timestep.int() +
                   1).long() if timestep is not None else seq_len
            if self.onnx_trace:
                return self.weights[self.padding_idx +
                                    pos, :].unsqueeze(1).repeat(bsz, 1, 1)
            return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1)

        positions = utils.make_positions(input, self.padding_idx,
                                         self.left_pad, self.onnx_trace)
        if self.onnx_trace:
            flat_embeddings = self.weights.detach().index_select(
                0, positions.view(-1))
            embedding_shape = torch.cat(
                (bsz.view(1), seq_len.view(1), torch.LongTensor([-1])))
            embeddings = torch.onnx.operators.reshape_from_tensor_shape(
                flat_embeddings, embedding_shape)
            return embeddings
        return self.weights.index_select(0, positions.view(-1)).view(
            bsz, seq_len, -1).detach()
Ejemplo n.º 5
0
 def forward(self, input, incremental_state=None):
     """Input is expected to be of size [bsz x seqlen]."""
     if incremental_state is not None:
         # positions is the same for every token when decoding a single step
         positions = input.data.new(1, 1).fill_(self.padding_idx + input.size(1))
     else:
         positions = utils.make_positions(input.data, self.padding_idx, self.left_pad)
     return super().forward(positions)
Ejemplo n.º 6
0
 def forward(self, input, incremental_state=None):
     """Input is expected to be of size [bsz x seqlen]."""
     if incremental_state is not None:
         # positions is the same for every token when decoding a single step
         positions = input.data.new(1, 1).fill_(self.padding_idx + input.size(1))
     else:
         positions = utils.make_positions(input.data, self.padding_idx, self.left_pad, self.onnx_trace)
     return super().forward(positions)
    def forward(self, input, incremental_state = None, marker = None, mark = 2):
        """Input is expected to be of size [bsz x seqlen]."""
        if incremental_state is not None:
            # positions is the same for every token when decoding a single ste
            positions = input.data.new(1, 1).fill_(self.padding_idx + input.size(1))
        else:
            positions = utils.make_positions(input.data, self.padding_idx, self.left_pad, marker, mark)

        #print(positions.shape, positions)
        return super().forward(Variable(positions))
Ejemplo n.º 8
0
def test_original_make_position_with_padding():
    pad_idx = 100
    test = torch.tensor([
        [1, 1, 2, 3, 99, 4, 100],
        [5, 6, 99, 7, 8, 100, 100],
    ])
    expected_positions = torch.tensor([
        [101, 102, 103, 104, 105, 106, 100],
        [101, 102, 103, 104, 105, 100, 100],
    ])
    calculated_positions = make_positions(test, padding_idx=pad_idx)
    assert torch.all(expected_positions.eq(calculated_positions))
Ejemplo n.º 9
0
def test_original_make_position():
    pad_idx = 0
    test = torch.tensor([
        [1, 1, 2, 3, 99, 4, 0],
        [5, 6, 99, 7, 8, 0, 0],
    ])
    expected_positions = torch.tensor([
        [1, 2, 3, 4, 5, 6, 0],
        [1, 2, 3, 4, 5, 0, 0],
    ])
    calculated_positions = make_positions(test, padding_idx=pad_idx)
    assert torch.all(expected_positions.eq(calculated_positions))
Ejemplo n.º 10
0
    def forward(
        self,
        input,
        incremental_state: Optional[Any] = None,
        timestep: Optional[Tensor] = None,
        positions: Optional[Any] = None,
    ):
        """Input is expected to be of size [bsz x seqlen]."""
        input_dim = input.shape[
            1]  # This is very hacky. Usually when the input subsequence
        input_mul = int(
            7000 // input_dim
        ) + 1  # is of size n, we get n positional embeddings. But in order
        input_for_cat = tuple(
            [input] * input_mul
        )  # to handle both attending to the current input subsequence and to the previous (cached) one,
        input = torch.cat(
            input_for_cat, dim=1
        )  # we need more positional embeddings, and so this is how we do that.

        bspair = torch.onnx.operators.shape_as_tensor(input)
        bsz, seq_len = bspair[0], bspair[1]
        max_pos = self.padding_idx + 1 + seq_len
        if self.weights is None or max_pos > self.weights.size(0):
            # recompute/expand embeddings if needed
            self.weights = SinusoidalPositionalEmbedding.get_embedding(
                max_pos, self.embedding_dim, self.padding_idx)
        self.weights = self.weights.to(self._float_tensor)

        if incremental_state is not None:
            # positions is the same for every token when decoding a single step
            pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len
            if self.onnx_trace:
                return (self.weights.index_select(index=self.padding_idx + pos,
                                                  dim=0).unsqueeze(1).repeat(
                                                      bsz, 1, 1))
            return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1)

        positions = utils.make_positions(input,
                                         self.padding_idx,
                                         onnx_trace=self.onnx_trace)
        if self.onnx_trace:
            flat_embeddings = self.weights.detach().index_select(
                0, positions.view(-1))
            embedding_shape = torch.cat((bsz.view(1), seq_len.view(1),
                                         torch.tensor([-1], dtype=torch.long)))
            embeddings = torch.onnx.operators.reshape_from_tensor_shape(
                flat_embeddings, embedding_shape)
            return embeddings
        return (self.weights.index_select(0, positions.view(-1)).view(
            bsz, seq_len, -1).detach())
Ejemplo n.º 11
0
    def forward(self,
                input,
                incremental_state: Optional[Any] = None,
                timestep: Optional[Tensor] = None,
                positions: Optional[Any] = None,
                dict=None):
        #  logging.info("INPUT:")
        # logging.info(input)
        """Input is expected to be of size [bsz x seqlen]."""
        bspair = torch.onnx.operators.shape_as_tensor(input)
        bsz, seq_len = bspair[0], bspair[1]
        max_pos = self.padding_idx + 1 + seq_len
        if self.weights is None or max_pos > self.weights.size(0):
            # recompute/expand embeddings if needed
            self.weights = SinusoidalPositionalEmbedding.get_embedding(
                max_pos, self.embedding_dim, self.padding_idx)
        self.weights = self.weights.to(self._float_tensor)

        if incremental_state is not None:
            # positions is the same for every token when decoding a single step
            pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len
            if self.onnx_trace:
                return (self.weights.index_select(index=self.padding_idx + pos,
                                                  dim=0).unsqueeze(1).repeat(
                                                      bsz, 1, 1),
                        self.weights.index_select(index=self.padding_idx + pos,
                                                  dim=0).unsqueeze(1).repeat(
                                                      bsz, 1, 1))
            return self.weights[self.padding_idx + pos, :].expand(
                bsz, 1, -1), self.weights[self.padding_idx + pos, :].expand(
                    bsz, 1, -1)

        positions, positions_end = utils.make_positions(
            input,
            self.padding_idx,
            onnx_trace=self.onnx_trace,
            dictionary=dict)
        if self.onnx_trace:
            flat_embeddings = self.weights.detach().index_select(
                0, positions.view(-1))
            embedding_shape = torch.cat((bsz.view(1), seq_len.view(1),
                                         torch.tensor([-1], dtype=torch.long)))
            embeddings = torch.onnx.operators.reshape_from_tensor_shape(
                flat_embeddings, embedding_shape)
            return embeddings
        return (self.weights.index_select(0, positions.view(-1)).view(
            bsz, seq_len,
            -1).detach(), self.weights.index_select(
                0, positions_end.view(-1)).view(bsz, seq_len, -1).detach())
Ejemplo n.º 12
0
    def forward(self, input, incremental_state=None, positions=None):
        """Input is expected to be of size [bsz x seqlen]."""
        assert (
            (positions is None) or (self.padding_idx is None)
        ), "If positions is pre-computed then padding_idx should not be set."

        if positions is None:
            if incremental_state is not None:
                # positions is the same for every token when decoding a single step
                # Without the int() cast, it doesn't work in some cases when exporting to ONNX
                positions = input.data.new(1, 1).fill_(int(self.padding_idx + input.size(1)))
            else:
                positions = utils.make_positions(
                    input, self.padding_idx, onnx_trace=self.onnx_trace,
                )
        return super().forward(positions)
    def forward(self, input, incremental_state=None):
        """Input is expected to be of size [bsz x seqlen]."""
        # recompute/expand embeddings if needed
        bsz, seq_len = input.size()
        max_pos = self.padding_idx + 1 + seq_len
        if self.weights is None or max_pos > self.weights.size(0):
            self.weights = SinusoidalPositionalEmbedding.get_embedding(
                max_pos,
                self.embedding_dim,
                self.padding_idx,
            )
        self.weights = self.weights.type_as(self._float_tensor)

        if incremental_state is not None:
            # positions is the same for every token when decoding a single step
            return self.weights[self.padding_idx + seq_len, :].expand(bsz, 1, -1)

        positions = utils.make_positions(input.data, self.padding_idx, self.left_pad)
        return self.weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1).detach()
Ejemplo n.º 14
0
    def forward(self,
                input,
                incremental_state=None,
                sentOffset=None,
                prefix=0,
                decside=False):
        """Input is expected to be of size [bsz x seqlen]."""
        if incremental_state is not None:
            # positions is the same for every token when decoding a single step
            positions = input.data.new(
                1, 1).fill_(self.padding_idx + input.size(1) +
                            (sentOffset if sentOffset is not None else 0))
        else:
            positions = utils.make_positions(input.data,
                                             self.padding_idx,
                                             self.left_pad,
                                             prefix=prefix,
                                             decside=decside)

        return super().forward(positions)
    def forward(self,
                input,
                incremental_state=None,
                length=None,
                timestep=None,
                sinpostype=None):
        """Input is expected to be of size [bsz x seqlen]."""
        bsz, seq_len = torch.onnx.operators.shape_as_tensor(input)
        max_pos = self.padding_idx + 1 + seq_len
        if length is not None and sinpostype == 'ratio':
            length4getemb = length
        else:
            length4getemb = None
        if self.weights is None or length4getemb is not None or max_pos > self.weights.size(
                0):
            # recompute/expand embeddings if needed
            self.weights = SinusoidalPositionalEmbedding.get_embedding(
                max_pos,
                self.embedding_dim,
                self.padding_idx,
                length4getemb,
            )
        self.weights = self.weights.type_as(self._float_tensor)

        if incremental_state is not None:
            # positions is the same for every token when decoding a single step
            pos = (timestep.int() +
                   1).long() if timestep is not None else seq_len
            if length4getemb is None and sinpostype == None:
                if self.onnx_trace:
                    return self.weights[self.padding_idx +
                                        pos, :].unsqueeze(1).repeat(bsz, 1, 1)
                return self.weights[self.padding_idx + pos, :].expand(
                    bsz, 1, -1)
            elif sinpostype == 'absolute':
                #todo: check in decoding, minus pos, self.padding_idx, and pos are scalar or vector
                minuspos = (length.view(-1) + 3) - (self.padding_idx +
                                                    pos).type_as(length.data)
                return self.weights.index_select(0, minuspos.view(-1)).view(
                    bsz, 1, -1)
            else:
                return self.weights[:, self.padding_idx + pos, :]

        positions = utils.make_positions(input, self.padding_idx,
                                         self.left_pad, self.onnx_trace)
        if length4getemb is None and sinpostype == None:
            if self.onnx_trace:
                flat_embeddings = self.weights.detach().index_select(
                    0, positions.view(-1))
                embedding_shape = torch.cat(
                    (bsz.view(1), seq_len.view(1), torch.LongTensor([-1])))
                embeddings = torch.onnx.operators.reshape_from_tensor_shape(
                    flat_embeddings, embedding_shape)
                return embeddings
            return self.weights.index_select(0, positions.view(-1)).view(
                bsz, seq_len, -1).detach()
        elif sinpostype == 'absolute':
            #add 3 to set range value with positions (if no value addition, cause error due to index -1)
            #correspondence to padding_idx (and left_pad?)
            minuspos = (length.view(-1, 1) + 3).expand(
                bsz, seq_len) - positions.view(bsz, seq_len)
            return self.weights.index_select(0, minuspos.view(-1)).view(
                bsz, seq_len, -1).detach()
        else:
            return self.weights.index_select(1, positions[0]).view(
                bsz, seq_len, -1).detach()
    def forward(
        self,
        input,
        incremental_state: Optional[Any] = None,
        length=None,
        timestep: Optional[Tensor] = None,
        positions: Optional[Any] = None,
        sinpostype=None,
    ):
        """Input is expected to be of size [bsz x seqlen]."""
        bspair = torch.onnx.operators.shape_as_tensor(input)
        bsz, seq_len = bspair[0], bspair[1]
        max_pos = self.padding_idx + 1 + seq_len
        if length is not None and sinpostype == 'ratio':
            length4getemb = length
        else:
            length4getemb = None
        if self.weights is None or length4getemb is not None or max_pos > self.weights.size(
                0):
            # recompute/expand embeddings if needed
            self.weights = SinusoidalPositionalEmbedding.get_embedding(
                max_pos,
                self.embedding_dim,
                self.padding_idx,
                length4getemb,
            )
        self.weights = self.weights.to(self._float_tensor)

        if incremental_state is not None:
            # positions is the same for every token when decoding a single step
            pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len
            if length4getemb is None and sinpostype == None:
                if self.onnx_trace:
                    return (self.weights.index_select(
                        index=self.padding_idx + pos,
                        dim=0).unsqueeze(1).repeat(bsz, 1, 1))
                return self.weights[self.padding_idx + pos, :].expand(
                    bsz, 1, -1)
            elif sinpostype == 'absolute':
                minuspos = (length.view(-1) + 3) - (self.padding_idx +
                                                    pos).type_as(length.data)
                return self.weights.index_select(0, minuspos.view(-1)).view(
                    bsz, 1, -1)
            else:
                return self.weights[:, self.padding_idx + pos, :]

        positions = utils.make_positions(input,
                                         self.padding_idx,
                                         onnx_trace=self.onnx_trace)
        if length4getemb is None and sinpostype == None:
            if self.onnx_trace:
                flat_embeddings = self.weights.detach().index_select(
                    0, positions.view(-1))
                embedding_shape = torch.cat((bsz.view(1), seq_len.view(1),
                                             torch.tensor([-1],
                                                          dtype=torch.long)))
                embeddings = torch.onnx.operators.reshape_from_tensor_shape(
                    flat_embeddings, embedding_shape)
                return embeddings
            return (self.weights.index_select(0, positions.view(-1)).view(
                bsz, seq_len, -1).detach())
        elif sinpostype == 'absolute':
            #add 3 to set range value with positions (if no value addition, cause error due to index -1)
            minuspos = (length.view(-1, 1) + 3).expand(
                bsz, seq_len) - positions.view(bsz, seq_len)
            return self.weights.index_select(0, minuspos.view(-1)).view(
                bsz, seq_len, -1).detach()
        else:
            return self.weights.index_select(1, positions[0]).view(
                bsz, seq_len, -1).detach()
Ejemplo n.º 17
0
    def forward(
        self,
        src_tokens,
        src_lengths: Optional[torch.Tensor] = None,
        return_all_hiddens: bool = False,
        token_embeddings: Optional[torch.Tensor] = None,
    ):
        """
        Args:
            src_tokens (LongTensor): tokens in the source language of shape
                `(batch, src_len)`
            src_lengths (torch.LongTensor): lengths of each source sentence of
                shape `(batch)`
            return_all_hiddens (bool, optional): also return all of the
                intermediate hidden states (default: False).
            token_embeddings (torch.Tensor, optional): precomputed embeddings
                default `None` will recompute embeddings

        Returns:
            dict:
                - **encoder_out** (Tensor): the last encoder layer's output of
                  shape `(src_len, batch, embed_dim)`
                - **encoder_padding_mask** (ByteTensor): the positions of
                  padding elements of shape `(batch, src_len)`
                - **encoder_embedding** (Tensor): the (scaled) embedding lookup
                  of shape `(batch, src_len, embed_dim)`
                - **encoder_states** (List[Tensor]): all intermediate
                  hidden states of shape `(src_len, batch, embed_dim)`.
                  Only populated if *return_all_hiddens* is True.
        """
        #logging.info(src_tokens)
        #logging.info(self.dictionary.string(src_tokens))
       # logging.info(self.dictionary.indices)
        if self.lattice:
            x, encoder_embedding = self.forward_embedding_no_pos(src_tokens, token_embeddings)
        #pos_s,pos_e=self.embed_positions(src_tokens, dict=self.dictionary)
            pos_s,pos_e = utils.make_positions(
            src_tokens, self.padding_idx,dictionary=self.dictionary
            )
        else:
            x, encoder_embedding = self.forward_embedding(src_tokens, token_embeddings)
        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # compute padding mask
        encoder_padding_mask = src_tokens.eq(self.padding_idx)

        encoder_states = []

        # encoder layers
        for layer in self.layers:
            if self.lattice:
                x = layer(x, encoder_padding_mask, pos_s=pos_s,pos_e=pos_e)

            else:
                x = layer(x, encoder_padding_mask)

            if return_all_hiddens:
                assert encoder_states is not None
                encoder_states.append(x)

        if self.layer_norm is not None:
            x = self.layer_norm(x)

        # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in
        # `foward` so we use a dictionary instead.
        # TorchScript does not support mixed values so the values are all lists.
        # The empty list is equivalent to None.
        return {
            "encoder_out": [x],  # T x B x C
            "encoder_padding_mask": [encoder_padding_mask],  # B x T
            "encoder_embedding": [encoder_embedding],  # B x T x C
            "encoder_states": encoder_states,  # List[T x B x C]
            "src_tokens": [],
            "src_lengths": [],
        }