def _generate_lines_with_start_token(self): line = Line("") line.add_token(self.start_token, "tokens") return line
def forward( self, lines: List[Line], c0: torch.FloatTensor, h0: torch.FloatTensor, encoder_outputs: torch.FloatTensor = None, teacher_forcing_ratio: float = 0, ) -> torch.Tensor: """ Parameters ---------- lines : list of Line objects Batched tokenized source sentence of shape [batch size]. h0, c0 : 3d torch.FloatTensor Hidden and cell state of the LSTM layer. Each state's shape [n layers * n directions, batch size, hidden dim] Returns ------- prediction : 2d torch.LongTensor For each token in the batch, the predicted target vobulary. Shape [batch size, output dim] hn, cn : 3d torch.FloatTensor Hidden and cell state of the LSTM layer. Each state's shape [n layers * n directions, batch size, hidden dim] """ use_teacher_forcing = (True if (random.random() < teacher_forcing_ratio) else False) if use_teacher_forcing: max_length = max(len(line.tokens["tokens"]) for line in lines) else: max_length = self.max_length batch_size = len(lines) # tensor to store decoder's output outputs = torch.zeros(max_length, batch_size, self.vocab_size).to(self.device) # last hidden & cell state of the encoder is used as the decoder's initial hidden state if use_teacher_forcing: prediction, _, _ = self.forward_step( lines=lines, h0=h0, c0=c0, encoder_outputs=encoder_outputs) outputs[1:] = prediction.permute(1, 0, 2)[:-1] else: lines = [self._generate_lines_with_start_token()] * batch_size for i in range(1, max_length): prediction, hn, cn = self.forward_step( lines=lines, h0=h0, c0=c0, encoder_outputs=encoder_outputs) prediction = prediction.squeeze(1) outputs[i] = prediction line_token_indexes = prediction.argmax(1) line_tokens = [ self.vocab.idx2token[line_token_index] for line_token_index in line_token_indexes.cpu().numpy() ] lines = [] for token in line_tokens: line = Line("") line.add_token(token, "tokens") lines.append(line) h0, c0 = hn, cn outputs = outputs.permute(1, 0, 2) return outputs