def forward(
        self,
        inputs: PackedSequence,  # pylint: disable=arguments-differ
        # pylint: disable=unused-argument
        initial_state: torch.Tensor = None
    ) -> Tuple[PackedSequence, torch.Tensor]:
        """
        Parameters
        ----------
        inputs : ``PackedSequence``, required.
            A batch first ``PackedSequence`` to run the stacked LSTM over.
        initial_state : Tuple[torch.Tensor, torch.Tensor], optional, (default = None)
            Currently, this is ignored.

        Returns
        -------
        output_sequence : ``PackedSequence``
            The encoded sequence of shape (batch_size, sequence_length, hidden_size)
        final_states: ``torch.Tensor``
            The per-layer final (state, memory) states of the LSTM, each with shape
            (num_layers, batch_size, hidden_size).
        """
        inputs, lengths = pad_packed_sequence(inputs, batch_first=True)

        # Kernel takes sequence length first tensors.
        inputs = inputs.transpose(0, 1)

        sequence_length, batch_size, _ = inputs.size()
        accumulator_shape = [
            self.num_layers, sequence_length + 1, batch_size, self.hidden_size
        ]
        state_accumulator = inputs.new_zeros(*accumulator_shape)
        memory_accumulator = inputs.new_zeros(*accumulator_shape)

        dropout_weights = inputs.new_ones(self.num_layers, batch_size,
                                          self.hidden_size)
        if self.training:
            # Normalize by 1 - dropout_prob to preserve the output statistics of the layer.
            dropout_weights.bernoulli_(1 - self.recurrent_dropout_probability)\
                .div_((1 - self.recurrent_dropout_probability))

        gates = inputs.new_tensor((self.num_layers, sequence_length,
                                   batch_size, 6 * self.hidden_size))

        lengths_variable = torch.LongTensor(lengths)
        implementation = _AlternatingHighwayLSTMFunction(
            self.input_size,
            self.hidden_size,
            num_layers=self.num_layers,
            train=self.training)
        output, _ = implementation(inputs, self.weight, self.bias,
                                   state_accumulator, memory_accumulator,
                                   dropout_weights, lengths_variable, gates)

        # TODO(Mark): Also return the state here by using index_select with the lengths so we can use
        # it as a Seq2VecEncoder.
        output = output.transpose(0, 1)
        output = pack_padded_sequence(output, lengths, batch_first=True)
        return output, None
Example #2
0
    def forward(self,  # pylint: disable=arguments-differ
                sequence_tensor: PackedSequence,
                initial_state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None) :

        if not isinstance(inputs, PackedSequence):
            raise ConfigurationError('inputs must be PackedSequence but got %s' % (type(inputs)))

        sequence_tensor, batch_lengths = pad_packed_sequence(inputs, batch_first=True)
        batch_size = sequence_tensor.size()[0]
        length = sequence_tensor.size()[1]

        hidden = torch.tensor([])
        cell = torch.tensor([])
        #初始化所有层隐藏状态
        if initial_state is None:
            hidden = sequence_tensor.new_zeros(self.num_layers, batch_size, self.hidden_size)
            cell = sequence_tensor.new_zeros(self.num_layers, batch_size, self.n_chunk, self.chunk_size)
        else:
            hidden = initial_state[0].squeeze(0)
            cell = initial_state[1].squeeze(0)

        if self.training:
            for c in self.cells:
                c.sample_masks()

        final_hidden = []
        final_cell = []

        for l in range(len(self.cells)):
            curr_layer = [None] * length
            t_input = self.cells[l].ih(sequence_tensor)
            hx = hidden[l].squeeze(0)
            cx = cell[l].squeeze(0)

            for t in range(length):
                hidden, cell = self.cells[l](None, hx, cx, transformed_input=t_input[:, t])
                # length, dim
                hx, cx = hidden, cell  # overwritten every timestep
                curr_layer[t] = hidden

            final_hidden.append(hx)
            final_cell.append(cx)
            # batch, length, dim
            sequence_tensor = torch.stack(curr_layer, dim=1)
            if l < len(self.cells) - 1:
                sequence_tensor = self.lockdrop(sequence_tensor, self.dropout)  #每一层LSTM后加lockdrop


        output = pack_padded_sequence(sequence_tensor, batch_lengths, batch_first=True)
        final_state = (torch.stack(final_hidden), torch.stack(final_cell))

        return sequence_tensor, final_state
    def forward(self, inputs: PackedSequence,  # pylint: disable=arguments-differ
                # pylint: disable=unused-argument
                initial_state: torch.Tensor = None)-> Tuple[PackedSequence, torch.Tensor]:
        """
        Parameters
        ----------
        inputs : ``PackedSequence``, required.
            A batch first ``PackedSequence`` to run the stacked LSTM over.
        initial_state : Tuple[torch.Tensor, torch.Tensor], optional, (default = None)
            Currently, this is ignored.

        Returns
        -------
        output_sequence : ``PackedSequence``
            The encoded sequence of shape (batch_size, sequence_length, hidden_size)
        final_states: ``torch.Tensor``
            The per-layer final (state, memory) states of the LSTM, each with shape
            (num_layers, batch_size, hidden_size).
        """
        inputs, lengths = pad_packed_sequence(inputs, batch_first=True)

        # Kernel takes sequence length first tensors.
        inputs = inputs.transpose(0, 1)

        sequence_length, batch_size, _ = inputs.size()
        accumulator_shape = [self.num_layers, sequence_length + 1, batch_size, self.hidden_size]
        state_accumulator = inputs.new_zeros(*accumulator_shape)
        memory_accumulator = inputs.new_zeros(*accumulator_shape)

        dropout_weights = inputs.new_ones(self.num_layers, batch_size, self.hidden_size)
        if self.training:
            # Normalize by 1 - dropout_prob to preserve the output statistics of the layer.
            dropout_weights.bernoulli_(1 - self.recurrent_dropout_probability)\
                .div_((1 - self.recurrent_dropout_probability))

        gates = inputs.new_tensor((self.num_layers, sequence_length, batch_size, 6 * self.hidden_size))

        lengths_variable = torch.LongTensor(lengths)
        implementation = _AlternatingHighwayLSTMFunction(self.input_size,
                                                         self.hidden_size,
                                                         num_layers=self.num_layers,
                                                         train=self.training)
        output, _ = implementation(inputs, self.weight, self.bias, state_accumulator,
                                   memory_accumulator, dropout_weights, lengths_variable, gates)

        # TODO(Mark): Also return the state here by using index_select with the lengths so we can use
        # it as a Seq2VecEncoder.
        output = output.transpose(0, 1)
        output = pack_padded_sequence(output, lengths, batch_first=True)
        return output, None
Example #4
0
    def forward(self, input: PackedSequence):

        input, batch_sizes = input

        seq_len = batch_sizes.size()[0]
        max_batch_size = batch_sizes[0]

        output = input.new_zeros(input.size(0), self.hidden_size)

        hidden_state = input.new_zeros(max_batch_size, self.hidden_size)
        cell_state = input.new_zeros(max_batch_size, self.hidden_size)

        recurrent_mask = get_dropout_mask(
            self.recurrent_dropout_prob,
            hidden_state) if self.training else None

        cumsum_sizes = torch.cumsum(batch_sizes, dim=0)
        for timestep in range(seq_len):
            timestep = timestep if self.go_forward else seq_len - timestep - 1
            len_t = batch_sizes[timestep]
            begin, end = (cumsum_sizes[timestep] - len_t,
                          cumsum_sizes[timestep])

            input_t = input[begin:end]
            hidden_t, cell_t = self.cell(
                input_t, (hidden_state[0:len_t], cell_state[0:len_t]))

            if self.training:
                hidden_t = hidden_t * recurrent_mask[:len_t]

            output[begin:end] = hidden_t
            hidden_state = hidden_state.clone()
            cell_state = cell_state.clone()
            hidden_state[0:batch_sizes[timestep]] = hidden_t
            cell_state[0:batch_sizes[timestep]] = cell_t

        return PackedSequence(output, batch_sizes), (hidden_state, cell_state)