def pad_and_mask(self, tensors): lengths = torch.tensor([len(s) for s in tensors], device=self.device) max_length = torch.max(lengths) pad_m = pad_mask(lengths, max_length=max_length, device=self.device) sub_m = subsequent_mask(max_length) tensors = (pad_sequence(tensors, batch_first=True, padding_value=self.pad_indx).to(self.device)) return tensors, pad_m, sub_m
def forward(self, x, lengths): x = self.embed(x) out, last_hidden, _ = self.rnn(x, lengths) if self.attention is not None: out, _ = self.attention(out, attention_mask=pad_mask( lengths, device=self.device)) out = out.sum(1) else: out = last_hidden return out
def forward(self, x, lengths): x = self.embed(x) out, last_out, hidden = self.rnn(x, lengths) if self.attention is not None: out, _ = self.attention(out, attention_mask=pad_mask( lengths, device=self.device)) out = out.sum(1) else: out = last_out """ Word rnn returns as output the output of the last unpadded timestep (attentions may have been applied) """ return out, hidden