def forward_step( self, decoder_inputs, decoder_input_lengths, encoder_outputs, encoder_output_lengths, positional_encoding_length, ) -> Tensor: dec_self_attn_pad_mask = get_attn_pad_mask(decoder_inputs, decoder_input_lengths, decoder_inputs.size(1)) dec_self_attn_subsequent_mask = get_attn_subsequent_mask( decoder_inputs) self_attn_mask = torch.gt( (dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0) encoder_attn_mask = get_attn_pad_mask(encoder_outputs, encoder_output_lengths, decoder_inputs.size(1)) outputs = self.embedding(decoder_inputs) + self.positional_encoding( positional_encoding_length) outputs = self.input_dropout(outputs) for layer in self.layers: outputs, self_attn, memory_attn = layer( inputs=outputs, encoder_outputs=encoder_outputs, self_attn_mask=self_attn_mask, encoder_attn_mask=encoder_attn_mask, ) return outputs
def forward(self, targets: Tensor, input_lengths: Optional[Tensor] = None, memory: Tensor = None) -> Tuple[Tensor, Tensor, Tensor]: self_attns, memory_attns = list(), list() non_pad_mask = get_pad_mask(targets, pad_id=self.pad_id).eq(False) self_attn_mask = get_attn_pad_mask( targets, self.pad_id) | get_subsequent_mask(targets) memory_mask = get_pad_mask( memory, input_lengths).squeeze(-1).unsqueeze(1).expand( -1, targets.size(1), -1) output = self.input_dropout( self.embedding(targets) * self.logit_scale + self.pos_encoding(targets.size(1))) for layer in self.layers: output, self_attn, memory_attn = layer(output, memory, non_pad_mask, self_attn_mask, memory_mask) self_attns.append(self_attn) memory_attns.append(memory_attn) return output, self_attns, memory_attns
def forward(self, inputs: Tensor, input_lengths: Tensor = None) -> Tuple[Tensor, list]: self_attn_mask = get_attn_pad_mask(inputs, input_lengths, inputs.size(1)) output = self.input_layer_norm(self.input_proj(inputs)) + self.positional_encoding(inputs.size(1)) output = self.input_dropout(output) for layer in self.layers: output, attn = layer(output, self_attn_mask) return output
def forward(self, inputs: Tensor, input_lengths: Optional[Any] = None, memory: Tensor = None): batch_size, output_length = inputs.size(0), inputs.size(1) self_attn_mask = get_decoder_self_attn_mask(inputs, inputs, self.pad_id) memory_mask = get_attn_pad_mask(memory, input_lengths, output_length) output = self.embedding(inputs) + self.positional_encoding(output_length) output = self.input_dropout(output) for layer in self.layers: output, self_attn, memory_attn = layer(output, memory, self_attn_mask, memory_mask) return output
def forward(self, inputs: Tensor, input_lengths: Optional[Any] = None, memory: Tensor = None): self_attns, memory_attns = list(), list() batch_size, output_length = inputs.size(0), inputs.size(1) non_pad_mask = get_pad_mask(inputs, pad_id=self.pad_id).eq(False) self_attn_mask = get_decoder_self_attn_mask(inputs, inputs, self.pad_id) memory_mask = get_attn_pad_mask(memory, input_lengths, output_length) output = self.input_dropout(self.embedding(inputs) + self.positional_encoding(inputs.size(1))) for layer in self.layers: output, self_attn, memory_attn = layer(output, memory, non_pad_mask, self_attn_mask, memory_mask) self_attns.append(self_attn) memory_attns.append(memory_attn) return output, self_attns, memory_attns
def forward(self, inputs: Tensor, input_lengths: Tensor = None): """ Args: inputs: BxT_inputxD input_lengths: Bx1 """ self_attns = list() non_pad_mask = get_pad_mask(inputs, input_lengths=input_lengths).eq(False) self_attn_mask = get_attn_pad_mask(inputs, input_lengths, inputs.size(1)) output = self.input_dropout(self.input_norm(self.input_proj(inputs)) + self.positional_encoding(inputs.size(1))) for layer in self.layers: output, attn = layer(output, non_pad_mask, self_attn_mask) self_attns.append(attn) return output, self_attns
def forward(self, targets: Tensor, encoder_outputs: Tensor, encoder_output_lengths: Tensor) -> Tensor: """ Forward propagate a `encoder_outputs` for training. Args: targets (torch.LongTensr): A target sequence passed to decoder. `IntTensor` of size ``(batch, seq_length)`` encoder_outputs (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size ``(batch, seq_length, dimension)`` encoder_output_lengths: The length of encoder outputs. ``(batch)`` Returns: * predicted_log_probs (torch.FloatTensor): Log probability of model predictions. """ batch_size = targets.size(0) targets = targets[targets != self.eos_id].view(batch_size, -1) target_length = targets.size(1) self_attn_mask = get_decoder_self_attn_mask(targets, targets, self.pad_id) encoder_outputs_mask = get_attn_pad_mask(encoder_outputs, encoder_output_lengths, target_length) outputs = self.embedding(targets) + self.positional_encoding( target_length) outputs = self.input_dropout(outputs) for layer in self.layers: outputs, self_attn, memory_attn = layer( inputs=outputs, encoder_outputs=encoder_outputs, self_attn_mask=self_attn_mask, encoder_outputs_mask=encoder_outputs_mask, ) predicted_log_probs = self.fc(outputs).log_softmax(dim=-1) return predicted_log_probs
def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor, Tensor]: """ Forward propagate a `inputs` for encoder training. Args: inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded `FloatTensor` of size ``(batch, seq_length, dimension)``. input_lengths (torch.LongTensor): The length of input tensor. ``(batch)`` Returns: (Tensor, Tensor, Tensor): * outputs: A output sequence of encoder. `FloatTensor` of size ``(batch, seq_length, dimension)`` * output_lengths: The length of encoder outputs. ``(batch)`` * encoder_log_probs: Log probability of encoder outputs will be passed to CTC Loss. If joint_ctc_attention is False, return None. """ encoder_log_probs = None features, output_lengths = self.conv(inputs, input_lengths) self_attn_mask = get_attn_pad_mask(features, output_lengths, features.size(1)) outputs = self.input_layer_norm( self.input_proj(features)) + self.positional_encoding( features.size(1)) outputs = self.input_dropout(outputs) for layer in self.layers: outputs, attn = layer(outputs, self_attn_mask) if self.joint_ctc_attention: encoder_log_probs = self.fc(outputs.transpose( 1, 2)).log_softmax(dim=-1) return outputs, output_lengths, encoder_log_probs