Esempio n. 1
0
    def forward(self, x: torch.autograd.Variable):
        # x's shape must be [b, self.in_size]
        assert x.size(1) == self.in_size

        # (b, in_size)

        x = x.view(x.size(0), self.in_size, 1, 1)
        x = self.deconv(x)

        # (b, out_channels, 128, 128)

        return x
Esempio n. 2
0
    def forward(self,
                encoder_outputs: torch.autograd.Variable,
                hidden_state: torch.autograd.Variable,
                targets: torch.autograd.Variable = None,
                max_length: int = None) -> tuple:
        """
        Forward step of the attentional decoder unit. If the targets parameter is not None, then teacher forcing is
        used, so during decoding, the previous output word will be provided at time step t. If targets is None, decoding
        follows the general method, when the input word for the recurrent unit at time step t, is the output word at
        time step t-1.

        :param targets:
            Variable, (batch_size, sequence_length) a batch of word ids.

        :param max_length:
            int, maximum length for the decoded sequence. If None the max_length parameter's
            value will be used.

        :param encoder_outputs:
            Variable, with size of (batch_size, sequence_length, hidden_size).

        :param hidden_state:
            Variable, (num_layers * directions, batch_size, hidden_size) initial hidden state.

        :return outputs: dict, containing three string keys, symbols: Ndarray, the decoded word ids,
                 alignment_weights:
        """
        batch_size = encoder_outputs.size(0)
        input_sequence_length = encoder_outputs.size(1)

        if targets is not None:

            predictions = self._forced_decode(
                targets=targets,
                batch_size=batch_size,
                hidden_state=hidden_state,
                encoder_outputs=encoder_outputs,
                input_sequence_length=input_sequence_length)

        else:

            predictions = self._predictive_decode(
                max_length=max_length,
                batch_size=batch_size,
                hidden_state=hidden_state,
                encoder_outputs=encoder_outputs,
                input_sequence_length=input_sequence_length)

        return self._outputs, predictions
Esempio n. 3
0
def get_dropout_mask(dropout_probability: float, tensor_for_masking: torch.autograd.Variable):
    """
    Computes and returns an element-wise dropout mask for a given tensor, where
    each element in the mask is dropped out with probability dropout_probability.
    Note that the mask is NOT applied to the tensor - the tensor is passed to retain
    the correct CUDA tensor type for the mask.

    Parameters
    ----------
    dropout_probability : float, required.
        Probability of dropping a dimension of the input.
    tensor_for_masking : torch.Variable, required.


    Returns
    -------
    A torch.FloatTensor consisting of the binary mask scaled by 1/ (1 - dropout_probability).
    This scaling ensures expected values and variances of the output of applying this mask
     and the original tensor are the same.
    """
    binary_mask = tensor_for_masking.clone()
    binary_mask.data.copy_(torch.rand(tensor_for_masking.size()) > dropout_probability)
    # Scale mask by 1/keep_prob to preserve output statistics.
    dropout_mask = binary_mask.float().div(1.0 - dropout_probability)
    return dropout_mask
Esempio n. 4
0
def get_dropout_mask(dropout_probability: float, tensor_for_masking: torch.autograd.Variable):
    """
    Computes and returns an element-wise dropout mask for a given tensor, where
    each element in the mask is dropped out with probability dropout_probability.
    Note that the mask is NOT applied to the tensor - the tensor is passed to retain
    the correct CUDA tensor type for the mask.

    Parameters
    ----------
    dropout_probability : float, required.
        Probability of dropping a dimension of the input.
    tensor_for_masking : torch.Variable, required.


    Returns
    -------
    A torch.FloatTensor consisting of the binary mask scaled by 1/ (1 - dropout_probability).
    This scaling ensures expected values and variances of the output of applying this mask
     and the original tensor are the same.
    """
    binary_mask = tensor_for_masking.clone()
    binary_mask.data.copy_(torch.rand(tensor_for_masking.size()) > dropout_probability)
    # Scale mask by 1/keep_prob to preserve output statistics.
    dropout_mask = binary_mask.float().div(1.0 - dropout_probability)
    return dropout_mask
Esempio n. 5
0
    def _forced_decode(self, targets: torch.autograd.Variable, batch_size: int,
                       hidden_state: torch.autograd.Variable,
                       encoder_outputs: torch.autograd.Variable,
                       input_sequence_length: int) -> list:
        """
        This method is primarily used during training, when target outputs are provided to the decoder.
        These target sequences start with an <SOS> token, which will serve as the first input to the _decode
        function. During the decoding iterations the decoder's predictions will only be used as final outputs to
        measure the loss, so the input for the (t)-th time step will be the (t-1)-th element of the provided
        targets.

        :param targets:
            Variable, (batch_size, sequence_length) a batch of word ids.

        :param batch_size:
            int, size of the currently processed batch.

        :param hidden_state:
            Variable, (num_layers * directions, batch_size, hidden_size) initial hidden state.

        :param encoder_outputs:
            Variable, with size of (batch_size, sequence_length, hidden_size).

        :param input_sequence_length:
            int, length of the input (for the encoder) sequence.
        """
        output_sequence_length = targets.size(1) - 1

        inputs = targets[:, :-1].contiguous()
        embedded_inputs = self.embedding(inputs)

        predictions = []

        self._outputs['symbols'] = numpy.zeros(
            (batch_size, output_sequence_length), dtype='int')
        self._outputs['alignment_weights'] = numpy.zeros(
            (batch_size, output_sequence_length, input_sequence_length))

        for step in range(output_sequence_length):
            step_input = embedded_inputs[:, step, :]
            step_input = step_input.unsqueeze(1)
            step_output, hidden_state, attn_weights = self._decode(
                inputs=step_input,
                hidden_state=hidden_state,
                encoder_outputs=encoder_outputs,
                batch_size=batch_size,
                sequence_length=input_sequence_length)

            predictions.append(step_output.squeeze(1))
            self._outputs[
                'alignment_weights'][:, step, :] = attn_weights.data.squeeze(
                    1).cpu().numpy()
            self._outputs['symbols'][:, step] = step_output.topk(
                1)[1].data.squeeze(-1).squeeze(-1).cpu().numpy()

        return predictions
Esempio n. 6
0
    def forward(self,
                encoder_outputs: torch.autograd.Variable,
                hidden_state: torch.autograd.Variable,
                targets: torch.autograd.Variable = None,
                max_length: int = None) -> tuple:
        """
        Forward step of the decoder unit. If the targets parameter is not None, then teacher forcing is used,
        so during decoding, the previous output word will be provided at time step t. If targets is None, decoding
        follows the general method, when the input word for the recurrent unit at time step t, is the output word at
        time step t-1.

        :param targets:
            Variable, (batch_size, sequence_length) a batch of word ids. If None, then normal teacher
            forcing is not applied.

        :param max_length:
            int, maximum length of the decoded sequence. If None, the maximum length parameter from
            the configuration file will be used as maximum length. This parameter has no effect, if
            targets parameter is provided, because in that case, the length of the target sequence
            will be decoding length.

        :param encoder_outputs:
            Variable, with size of (batch_size, sequence_length, hidden_size). This parameter
            is redundant for the standard decoder unit.

        :param hidden_state:
            Variable, (num_layers * directions, batch_size, hidden_size) initial hidden state.

        :return decoder_outputs:
            dict, containing two string keys, symbols: Ndarray, the decoded word ids.
        """
        batch_size = encoder_outputs.size(0)

        if targets is not None:

            predictions = self._forced_decode(targets=targets,
                                              batch_size=batch_size,
                                              hidden_state=hidden_state,
                                              encoder_outputs=encoder_outputs,
                                              input_sequence_length=None)

        else:

            predictions = self._predictive_decode(
                max_length=max_length,
                batch_size=batch_size,
                hidden_state=hidden_state,
                encoder_outputs=encoder_outputs,
                input_sequence_length=None)

        return self._outputs, predictions
Esempio n. 7
0
    def _forced_decode(self,
                       targets: torch.autograd.Variable,
                       batch_size: int,
                       hidden_state: torch.autograd.Variable,
                       encoder_outputs: torch.autograd.Variable,
                       input_sequence_length: int = None) -> list:
        """
        This method is primarily used during training, when target outputs are provided to the decoder.
        These target sequences start with an <SOS> token, which will serve as the first input to the _decode
        function. During the decoding iterations the decoder's predictions will only be used as final outputs to
        measure the loss, so the input for the (t)-th time step will be the (t-1)-th element of the provided
        targets.

        :param targets:
            Variable, (batch_size, sequence_length) a batch of word ids.

        :param batch_size:
            int, size of the currently processed batch.

        :param hidden_state:
            Variable, (num_layers * directions, batch_size, hidden_size) initial hidden state.

        :param encoder_outputs:
            Variable, with size of (batch_size, sequence_length, hidden_size).

        :param input_sequence_length:
            This parameter is required only by the attentional version of this method.
        """
        output_sequence_length = targets.size(1) - 1

        self._outputs['symbols'] = numpy.zeros(
            (batch_size, output_sequence_length), dtype=numpy.int32)

        predictions = []

        inputs = targets[:, :-1].contiguous()
        embedded_inputs = self.embedding(inputs)

        outputs, hidden_state, _ = self._decode(inputs=embedded_inputs,
                                                hidden_state=hidden_state,
                                                encoder_outputs=None,
                                                batch_size=batch_size,
                                                sequence_length=None)

        for step in range(output_sequence_length):
            self._outputs['symbols'][:, step] = outputs[:, step, :].topk(
                1)[1].squeeze(-1).data.cpu().numpy()
            predictions.append(outputs[:, step, :])

        return predictions
Esempio n. 8
0
    def probabilities(self,
                      states: torch.autograd.Variable,
                      training: bool = True) -> np.ndarray:
        epsilon = self._epsilon if training else 0

        q_values = self._model.q_values(states)
        # noinspection PyArgumentList
        _, argmax = torch.max(q_values, dim=1)
        batch_size = states.size()[0]
        probabilities: torch.FloatTensor = torch.ones((batch_size, self._model.num_actions)) * \
            epsilon / self._model.num_actions
        arange = torch.arange(0, batch_size).type(torch.LongTensor)
        if self._model.is_cuda:
            probabilities = probabilities.cuda()
            arange = arange.cuda()
        probabilities[arange, argmax.data] += (1 - epsilon)
        if self._model.is_cuda:
            return probabilities.cpu().numpy()[0]
        else:
            return probabilities.numpy()[0]