def test_make_positions(self): pad = 1 left_pad_input = torch.LongTensor([ [9, 9, 9, 9, 9], [1, 9, 9, 9, 9], [1, 1, 1, 9, 9], ]) left_pad_output = torch.LongTensor([ [2, 3, 4, 5, 6], [1, 2, 3, 4, 5], [1, 1, 1, 2, 3], ]) right_pad_input = torch.LongTensor([ [9, 9, 9, 9, 9], [9, 9, 9, 9, 1], [9, 9, 1, 1, 1], ]) right_pad_output = torch.LongTensor([ [2, 3, 4, 5, 6], [2, 3, 4, 5, 1], [2, 3, 1, 1, 1], ]) self.assertAlmostEqual( left_pad_output, utils.make_positions(left_pad_input, pad, left_pad=True), ) self.assertAlmostEqual( right_pad_output, utils.make_positions(right_pad_input, pad, left_pad=False), )
def forward( self, input: Tensor, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, positions: Optional[Tensor] = None, ): """Input is expected to be of size [bsz x seqlen].""" assert (positions is None) or ( self.padding_idx is None ), "If positions is pre-computed then padding_idx should not be set." if positions is None: if incremental_state is not None: # positions is the same for every token when decoding a single step # Without the int() cast, it doesn't work in some cases when exporting to ONNX positions = torch.zeros( (1, 1), device=input.device, dtype=input.dtype).fill_( int(self.padding_idx + input.size(1))) else: positions = utils.make_positions(input, self.padding_idx, onnx_trace=self.onnx_trace) return F.embedding( positions, self.weight, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse, )
def forward(self, input, incremental_state=None, timestep=None): """Input is expected to be of size [bsz x seqlen].""" bsz, seq_len = torch.onnx.operators.shape_as_tensor(input) max_pos = self.padding_idx + 1 + seq_len if self.weights is None or max_pos > self.weights.size(0): # recompute/expand embeddings if needed self.weights = SinusoidalPositionalEmbedding.get_embedding( max_pos, self.embedding_dim, self.padding_idx, ) self.weights = self.weights.type_as(self._float_tensor) if incremental_state is not None: # positions is the same for every token when decoding a single step pos = (timestep.int() + 1).long() if timestep is not None else seq_len if self.onnx_trace: return self.weights[self.padding_idx + pos, :].unsqueeze(1).repeat(bsz, 1, 1) return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1) positions = utils.make_positions(input, self.padding_idx, self.left_pad, self.onnx_trace) if self.onnx_trace: flat_embeddings = self.weights.detach().index_select( 0, positions.view(-1)) embedding_shape = torch.cat( (bsz.view(1), seq_len.view(1), torch.LongTensor([-1]))) embeddings = torch.onnx.operators.reshape_from_tensor_shape( flat_embeddings, embedding_shape) return embeddings return self.weights.index_select(0, positions.view(-1)).view( bsz, seq_len, -1).detach()
def forward(self, input, incremental_state=None): """Input is expected to be of size [bsz x seqlen].""" if incremental_state is not None: # positions is the same for every token when decoding a single step positions = input.data.new(1, 1).fill_(self.padding_idx + input.size(1)) else: positions = utils.make_positions(input.data, self.padding_idx, self.left_pad) return super().forward(positions)
def forward(self, input, incremental_state=None): """Input is expected to be of size [bsz x seqlen].""" if incremental_state is not None: # positions is the same for every token when decoding a single step positions = input.data.new(1, 1).fill_(self.padding_idx + input.size(1)) else: positions = utils.make_positions(input.data, self.padding_idx, self.left_pad, self.onnx_trace) return super().forward(positions)
def forward(self, input, incremental_state = None, marker = None, mark = 2): """Input is expected to be of size [bsz x seqlen].""" if incremental_state is not None: # positions is the same for every token when decoding a single ste positions = input.data.new(1, 1).fill_(self.padding_idx + input.size(1)) else: positions = utils.make_positions(input.data, self.padding_idx, self.left_pad, marker, mark) #print(positions.shape, positions) return super().forward(Variable(positions))
def test_original_make_position_with_padding(): pad_idx = 100 test = torch.tensor([ [1, 1, 2, 3, 99, 4, 100], [5, 6, 99, 7, 8, 100, 100], ]) expected_positions = torch.tensor([ [101, 102, 103, 104, 105, 106, 100], [101, 102, 103, 104, 105, 100, 100], ]) calculated_positions = make_positions(test, padding_idx=pad_idx) assert torch.all(expected_positions.eq(calculated_positions))
def test_original_make_position(): pad_idx = 0 test = torch.tensor([ [1, 1, 2, 3, 99, 4, 0], [5, 6, 99, 7, 8, 0, 0], ]) expected_positions = torch.tensor([ [1, 2, 3, 4, 5, 6, 0], [1, 2, 3, 4, 5, 0, 0], ]) calculated_positions = make_positions(test, padding_idx=pad_idx) assert torch.all(expected_positions.eq(calculated_positions))
def forward( self, input, incremental_state: Optional[Any] = None, timestep: Optional[Tensor] = None, positions: Optional[Any] = None, ): """Input is expected to be of size [bsz x seqlen].""" input_dim = input.shape[ 1] # This is very hacky. Usually when the input subsequence input_mul = int( 7000 // input_dim ) + 1 # is of size n, we get n positional embeddings. But in order input_for_cat = tuple( [input] * input_mul ) # to handle both attending to the current input subsequence and to the previous (cached) one, input = torch.cat( input_for_cat, dim=1 ) # we need more positional embeddings, and so this is how we do that. bspair = torch.onnx.operators.shape_as_tensor(input) bsz, seq_len = bspair[0], bspair[1] max_pos = self.padding_idx + 1 + seq_len if self.weights is None or max_pos > self.weights.size(0): # recompute/expand embeddings if needed self.weights = SinusoidalPositionalEmbedding.get_embedding( max_pos, self.embedding_dim, self.padding_idx) self.weights = self.weights.to(self._float_tensor) if incremental_state is not None: # positions is the same for every token when decoding a single step pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len if self.onnx_trace: return (self.weights.index_select(index=self.padding_idx + pos, dim=0).unsqueeze(1).repeat( bsz, 1, 1)) return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1) positions = utils.make_positions(input, self.padding_idx, onnx_trace=self.onnx_trace) if self.onnx_trace: flat_embeddings = self.weights.detach().index_select( 0, positions.view(-1)) embedding_shape = torch.cat((bsz.view(1), seq_len.view(1), torch.tensor([-1], dtype=torch.long))) embeddings = torch.onnx.operators.reshape_from_tensor_shape( flat_embeddings, embedding_shape) return embeddings return (self.weights.index_select(0, positions.view(-1)).view( bsz, seq_len, -1).detach())
def forward(self, input, incremental_state: Optional[Any] = None, timestep: Optional[Tensor] = None, positions: Optional[Any] = None, dict=None): # logging.info("INPUT:") # logging.info(input) """Input is expected to be of size [bsz x seqlen].""" bspair = torch.onnx.operators.shape_as_tensor(input) bsz, seq_len = bspair[0], bspair[1] max_pos = self.padding_idx + 1 + seq_len if self.weights is None or max_pos > self.weights.size(0): # recompute/expand embeddings if needed self.weights = SinusoidalPositionalEmbedding.get_embedding( max_pos, self.embedding_dim, self.padding_idx) self.weights = self.weights.to(self._float_tensor) if incremental_state is not None: # positions is the same for every token when decoding a single step pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len if self.onnx_trace: return (self.weights.index_select(index=self.padding_idx + pos, dim=0).unsqueeze(1).repeat( bsz, 1, 1), self.weights.index_select(index=self.padding_idx + pos, dim=0).unsqueeze(1).repeat( bsz, 1, 1)) return self.weights[self.padding_idx + pos, :].expand( bsz, 1, -1), self.weights[self.padding_idx + pos, :].expand( bsz, 1, -1) positions, positions_end = utils.make_positions( input, self.padding_idx, onnx_trace=self.onnx_trace, dictionary=dict) if self.onnx_trace: flat_embeddings = self.weights.detach().index_select( 0, positions.view(-1)) embedding_shape = torch.cat((bsz.view(1), seq_len.view(1), torch.tensor([-1], dtype=torch.long))) embeddings = torch.onnx.operators.reshape_from_tensor_shape( flat_embeddings, embedding_shape) return embeddings return (self.weights.index_select(0, positions.view(-1)).view( bsz, seq_len, -1).detach(), self.weights.index_select( 0, positions_end.view(-1)).view(bsz, seq_len, -1).detach())
def forward(self, input, incremental_state=None, positions=None): """Input is expected to be of size [bsz x seqlen].""" assert ( (positions is None) or (self.padding_idx is None) ), "If positions is pre-computed then padding_idx should not be set." if positions is None: if incremental_state is not None: # positions is the same for every token when decoding a single step # Without the int() cast, it doesn't work in some cases when exporting to ONNX positions = input.data.new(1, 1).fill_(int(self.padding_idx + input.size(1))) else: positions = utils.make_positions( input, self.padding_idx, onnx_trace=self.onnx_trace, ) return super().forward(positions)
def forward(self, input, incremental_state=None): """Input is expected to be of size [bsz x seqlen].""" # recompute/expand embeddings if needed bsz, seq_len = input.size() max_pos = self.padding_idx + 1 + seq_len if self.weights is None or max_pos > self.weights.size(0): self.weights = SinusoidalPositionalEmbedding.get_embedding( max_pos, self.embedding_dim, self.padding_idx, ) self.weights = self.weights.type_as(self._float_tensor) if incremental_state is not None: # positions is the same for every token when decoding a single step return self.weights[self.padding_idx + seq_len, :].expand(bsz, 1, -1) positions = utils.make_positions(input.data, self.padding_idx, self.left_pad) return self.weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1).detach()
def forward(self, input, incremental_state=None, sentOffset=None, prefix=0, decside=False): """Input is expected to be of size [bsz x seqlen].""" if incremental_state is not None: # positions is the same for every token when decoding a single step positions = input.data.new( 1, 1).fill_(self.padding_idx + input.size(1) + (sentOffset if sentOffset is not None else 0)) else: positions = utils.make_positions(input.data, self.padding_idx, self.left_pad, prefix=prefix, decside=decside) return super().forward(positions)
def forward(self, input, incremental_state=None, length=None, timestep=None, sinpostype=None): """Input is expected to be of size [bsz x seqlen].""" bsz, seq_len = torch.onnx.operators.shape_as_tensor(input) max_pos = self.padding_idx + 1 + seq_len if length is not None and sinpostype == 'ratio': length4getemb = length else: length4getemb = None if self.weights is None or length4getemb is not None or max_pos > self.weights.size( 0): # recompute/expand embeddings if needed self.weights = SinusoidalPositionalEmbedding.get_embedding( max_pos, self.embedding_dim, self.padding_idx, length4getemb, ) self.weights = self.weights.type_as(self._float_tensor) if incremental_state is not None: # positions is the same for every token when decoding a single step pos = (timestep.int() + 1).long() if timestep is not None else seq_len if length4getemb is None and sinpostype == None: if self.onnx_trace: return self.weights[self.padding_idx + pos, :].unsqueeze(1).repeat(bsz, 1, 1) return self.weights[self.padding_idx + pos, :].expand( bsz, 1, -1) elif sinpostype == 'absolute': #todo: check in decoding, minus pos, self.padding_idx, and pos are scalar or vector minuspos = (length.view(-1) + 3) - (self.padding_idx + pos).type_as(length.data) return self.weights.index_select(0, minuspos.view(-1)).view( bsz, 1, -1) else: return self.weights[:, self.padding_idx + pos, :] positions = utils.make_positions(input, self.padding_idx, self.left_pad, self.onnx_trace) if length4getemb is None and sinpostype == None: if self.onnx_trace: flat_embeddings = self.weights.detach().index_select( 0, positions.view(-1)) embedding_shape = torch.cat( (bsz.view(1), seq_len.view(1), torch.LongTensor([-1]))) embeddings = torch.onnx.operators.reshape_from_tensor_shape( flat_embeddings, embedding_shape) return embeddings return self.weights.index_select(0, positions.view(-1)).view( bsz, seq_len, -1).detach() elif sinpostype == 'absolute': #add 3 to set range value with positions (if no value addition, cause error due to index -1) #correspondence to padding_idx (and left_pad?) minuspos = (length.view(-1, 1) + 3).expand( bsz, seq_len) - positions.view(bsz, seq_len) return self.weights.index_select(0, minuspos.view(-1)).view( bsz, seq_len, -1).detach() else: return self.weights.index_select(1, positions[0]).view( bsz, seq_len, -1).detach()
def forward( self, input, incremental_state: Optional[Any] = None, length=None, timestep: Optional[Tensor] = None, positions: Optional[Any] = None, sinpostype=None, ): """Input is expected to be of size [bsz x seqlen].""" bspair = torch.onnx.operators.shape_as_tensor(input) bsz, seq_len = bspair[0], bspair[1] max_pos = self.padding_idx + 1 + seq_len if length is not None and sinpostype == 'ratio': length4getemb = length else: length4getemb = None if self.weights is None or length4getemb is not None or max_pos > self.weights.size( 0): # recompute/expand embeddings if needed self.weights = SinusoidalPositionalEmbedding.get_embedding( max_pos, self.embedding_dim, self.padding_idx, length4getemb, ) self.weights = self.weights.to(self._float_tensor) if incremental_state is not None: # positions is the same for every token when decoding a single step pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len if length4getemb is None and sinpostype == None: if self.onnx_trace: return (self.weights.index_select( index=self.padding_idx + pos, dim=0).unsqueeze(1).repeat(bsz, 1, 1)) return self.weights[self.padding_idx + pos, :].expand( bsz, 1, -1) elif sinpostype == 'absolute': minuspos = (length.view(-1) + 3) - (self.padding_idx + pos).type_as(length.data) return self.weights.index_select(0, minuspos.view(-1)).view( bsz, 1, -1) else: return self.weights[:, self.padding_idx + pos, :] positions = utils.make_positions(input, self.padding_idx, onnx_trace=self.onnx_trace) if length4getemb is None and sinpostype == None: if self.onnx_trace: flat_embeddings = self.weights.detach().index_select( 0, positions.view(-1)) embedding_shape = torch.cat((bsz.view(1), seq_len.view(1), torch.tensor([-1], dtype=torch.long))) embeddings = torch.onnx.operators.reshape_from_tensor_shape( flat_embeddings, embedding_shape) return embeddings return (self.weights.index_select(0, positions.view(-1)).view( bsz, seq_len, -1).detach()) elif sinpostype == 'absolute': #add 3 to set range value with positions (if no value addition, cause error due to index -1) minuspos = (length.view(-1, 1) + 3).expand( bsz, seq_len) - positions.view(bsz, seq_len) return self.weights.index_select(0, minuspos.view(-1)).view( bsz, seq_len, -1).detach() else: return self.weights.index_select(1, positions[0]).view( bsz, seq_len, -1).detach()
def forward( self, src_tokens, src_lengths: Optional[torch.Tensor] = None, return_all_hiddens: bool = False, token_embeddings: Optional[torch.Tensor] = None, ): """ Args: src_tokens (LongTensor): tokens in the source language of shape `(batch, src_len)` src_lengths (torch.LongTensor): lengths of each source sentence of shape `(batch)` return_all_hiddens (bool, optional): also return all of the intermediate hidden states (default: False). token_embeddings (torch.Tensor, optional): precomputed embeddings default `None` will recompute embeddings Returns: dict: - **encoder_out** (Tensor): the last encoder layer's output of shape `(src_len, batch, embed_dim)` - **encoder_padding_mask** (ByteTensor): the positions of padding elements of shape `(batch, src_len)` - **encoder_embedding** (Tensor): the (scaled) embedding lookup of shape `(batch, src_len, embed_dim)` - **encoder_states** (List[Tensor]): all intermediate hidden states of shape `(src_len, batch, embed_dim)`. Only populated if *return_all_hiddens* is True. """ #logging.info(src_tokens) #logging.info(self.dictionary.string(src_tokens)) # logging.info(self.dictionary.indices) if self.lattice: x, encoder_embedding = self.forward_embedding_no_pos(src_tokens, token_embeddings) #pos_s,pos_e=self.embed_positions(src_tokens, dict=self.dictionary) pos_s,pos_e = utils.make_positions( src_tokens, self.padding_idx,dictionary=self.dictionary ) else: x, encoder_embedding = self.forward_embedding(src_tokens, token_embeddings) # B x T x C -> T x B x C x = x.transpose(0, 1) # compute padding mask encoder_padding_mask = src_tokens.eq(self.padding_idx) encoder_states = [] # encoder layers for layer in self.layers: if self.lattice: x = layer(x, encoder_padding_mask, pos_s=pos_s,pos_e=pos_e) else: x = layer(x, encoder_padding_mask) if return_all_hiddens: assert encoder_states is not None encoder_states.append(x) if self.layer_norm is not None: x = self.layer_norm(x) # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in # `foward` so we use a dictionary instead. # TorchScript does not support mixed values so the values are all lists. # The empty list is equivalent to None. return { "encoder_out": [x], # T x B x C "encoder_padding_mask": [encoder_padding_mask], # B x T "encoder_embedding": [encoder_embedding], # B x T x C "encoder_states": encoder_states, # List[T x B x C] "src_tokens": [], "src_lengths": [], }