def layer_forward(self, x, hx, cell, batch_sizes, reverse=False): hx_0 = hx_i = hx hx_n, output = [], [] steps = reversed(range(len(x))) if reverse else range(len(x)) if self.training: hid_mask = SharedDropout.get_mask(hx_0[0], self.dropout) for t in steps: last_batch_size, batch_size = len(hx_i[0]), batch_sizes[t] if last_batch_size < batch_size: hx_i = [ torch.cat((h, ih[last_batch_size:batch_size])) for h, ih in zip(hx_i, hx_0) ] else: hx_n.append([h[batch_size:] for h in hx_i]) hx_i = [h[:batch_size] for h in hx_i] hx_i = [h for h in cell(x[t], hx_i)] output.append(hx_i[0]) if self.training: hx_i[0] = hx_i[0] * hid_mask[:batch_size] if reverse: hx_n = hx_i output.reverse() else: hx_n.append(hx_i) hx_n = [torch.cat(h) for h in zip(*reversed(hx_n))] output = torch.cat(output) return output, hx_n
def forward(self, sequence, hx=None): r""" Args: sequence (~torch.nn.utils.rnn.PackedSequence): A packed variable length sequence. hx (~torch.Tensor, ~torch.Tensor): A tuple composed of two tensors `h` and `c`. `h` of shape ``[num_layers*2, batch_size, hidden_size]`` contains the initial hidden state for each element in the batch. `c` of shape ``[num_layers*2, batch_size, hidden_size]`` contains the initial cell state for each element in the batch. If `hx` is not provided, both `h` and `c` default to zero. Default: ``None``. Returns: ~torch.nn.utils.rnn.PackedSequence, (~torch.Tensor, ~torch.Tensor): The first is a packed variable length sequence. The second is a tuple of tensors `h` and `c`. `h` of shape ``[num_layers*2, batch_size, hidden_size]`` contains the hidden state for `t = seq_len`. Like output, the layers can be separated using ``h.view(num_layers, 2, batch_size, hidden_size)`` and similarly for c. `c` of shape ``[num_layers*2, batch_size, hidden_size]`` contains the cell state for `t = seq_len`. """ x, batch_sizes = sequence.data, sequence.batch_sizes.tolist() batch_size = batch_sizes[0] h_n, c_n = [], [] if hx is None: ih = x.new_zeros(self.num_layers * 2, batch_size, self.hidden_size) h, c = ih, ih else: h, c = self.permute_hidden(hx, sequence.sorted_indices) h = h.view(self.num_layers, 2, batch_size, self.hidden_size) c = c.view(self.num_layers, 2, batch_size, self.hidden_size) for i in range(self.num_layers): x = torch.split(x, batch_sizes) if self.training: mask = SharedDropout.get_mask(x[0], self.dropout) x = [i * mask[:len(i)] for i in x] x_f, (h_f, c_f) = self.layer_forward(x=x, hx=(h[i, 0], c[i, 0]), cell=self.f_cells[i], batch_sizes=batch_sizes) x_b, (h_b, c_b) = self.layer_forward(x=x, hx=(h[i, 1], c[i, 1]), cell=self.b_cells[i], batch_sizes=batch_sizes, reverse=True) x = torch.cat((x_f, x_b), -1) h_n.append(torch.stack((h_f, h_b))) c_n.append(torch.stack((c_f, c_b))) x = PackedSequence(x, sequence.batch_sizes, sequence.sorted_indices, sequence.unsorted_indices) hx = torch.cat(h_n, 0), torch.cat(c_n, 0) hx = self.permute_hidden(hx, sequence.unsorted_indices) return x, hx
def forward(self, sequence, hx=None): """ Args: sequence (PackedSequence): A packed variable length sequence. hx (tuple[torch.Tensor, torch.Tensor]): h (``[num_layers * 2, batch_size, hidden_size]``) contains the initial hidden state for each element in the batch. c (``[num_layers * 2, batch_size, hidden_size]``) contains the initial cell state for each element in the batch. If (h, x) is not provided, both h and c default to zero. Default: None. Returns: x (PackedSequence): A packed variable length sequence. hx (tuple[torch.Tensor, torch.Tensor]): h (``[num_layers * 2, batch_size, hidden_size]``) contains the hidden state for ``t = seq_len``. Like output, the layers can be separated using ``h.view(num_layers, 2, batch_size, hidden_size)`` and similarly for c. c (``[num_layers * 2, batch_size, hidden_size]``) contains the cell state for ``t = seq_len``. """ x, batch_sizes = sequence.data, sequence.batch_sizes.tolist() batch_size = batch_sizes[0] h_n, c_n = [], [] if hx is None: ih = x.new_zeros(self.num_layers * 2, batch_size, self.hidden_size) h, c = ih, ih else: h, c = self.permute_hidden(hx, sequence.sorted_indices) h = h.view(self.num_layers, 2, batch_size, self.hidden_size) c = c.view(self.num_layers, 2, batch_size, self.hidden_size) for i in range(self.num_layers): x = torch.split(x, batch_sizes) if self.training: mask = SharedDropout.get_mask(x[0], self.dropout) x = [i * mask[:len(i)] for i in x] x_f, (h_f, c_f) = self.layer_forward(x=x, hx=(h[i, 0], c[i, 0]), cell=self.f_cells[i], batch_sizes=batch_sizes) x_b, (h_b, c_b) = self.layer_forward(x=x, hx=(h[i, 1], c[i, 1]), cell=self.b_cells[i], batch_sizes=batch_sizes, reverse=True) x = torch.cat((x_f, x_b), -1) h_n.append(torch.stack((h_f, h_b))) c_n.append(torch.stack((c_f, c_b))) x = PackedSequence(x, sequence.batch_sizes, sequence.sorted_indices, sequence.unsorted_indices) hx = torch.cat(h_n, 0), torch.cat(c_n, 0) hx = self.permute_hidden(hx, sequence.unsorted_indices) return x, hx