Exemple #1
0
    def memorize_exploration(self, states: torch.tensor,
                             actions: torch.IntTensor,
                             rewards: torch.tensor,
                             last_state_is_final=True):
        """
        Memorizes a whole exploration process with a final single reward.
        Should be used for processes for which the reward isn't specifically known for
        every state-action couple, but rather according to a final score.
        :param states: Successive states encountered. Should be a tensor of shape
                      (number_of_states, state_dim)
        :param actions: Successive actions decided by the agent. Should be a tensor of shape
                       (number_of_states - 1, )
        :param next_states: For each state-action (s, a) encountered, state s' returned by the
                           environment. Same shape as :param state:.
        :param rewards (number_of_states - 1, )-sized 1D Tensor indicating the rewards for the episode
        :param last_state_is_final: Indicates whether the last state in the exploration was final.
        """
        states = states.to(self.device)

        # Creates a tensor containing [0, 0, ..., 0, 1] to indicate that only the last state was final
        final_indicator = torch.zeros(states.size()[0] - 1, device=self.device)
        final_indicator[-1] = last_state_is_final
        # States at the beginning of each step, including the final indicator
        next_states = torch.cat((states[1:], final_indicator.view(-1, 1)), dim=1)

        actions = actions.to(self.device)
        rewards = rewards.to(self.device)
        self.mem.memorize(states[:-1], actions, next_states, rewards)
    def memorize(self, states: torch.tensor,
                 actions: torch.IntTensor,
                 next_states: torch.tensor,
                 rewards: torch.tensor):
        """
        Memorizes a batch of exploration transitions (quadruples s, a, ns, r).
        :param states: Successive states encountered. Should have shape (number_of_states, state_dim + 1) where
                           the last column values are either 1 if the correspond state is final or 0 otherwise.
        :param actions: Successive actions decided by the agent. Should be a tensor of shape
                       (number_of_states)
        :param next_states: (number_of_states, state_dim) shaped tensor indicating the next states.
        :param rewards: (number_of_states, )-sized 1D tensor containing the rewards for
                             the episode.
        """
        if len(states.size()) + len(actions.size()) + len(next_states.size()) != 5:
            raise ValueError("Wrong dimensions")
            return None

        # Make sure the tensors are on the right device
        states.to(self.device)
        next_states.to(self.device)
        actions.to(self.device)
        rewards.to(self.device)

        if self.need_init:
            self.state_mem = states
            self.action_mem = actions.type(torch.int64)
            self.nstate_mem = next_states
            self.reward_mem = rewards
            self.need_init = False
        else:
            self.state_mem = torch.cat((self.state_mem, states), dim=0)
            self.action_mem = torch.cat((self.action_mem, actions.type(torch.int64)))
            self.nstate_mem = torch.cat((self.nstate_mem, next_states), dim=0)
            nb_states_added = states.size()[0]
            self.reward_mem = torch.cat((self.reward_mem, rewards))
Exemple #3
0
def rna_loss(log_probs: torch.FloatTensor,
             labels: torch.IntTensor,
             frames_lengths: torch.IntTensor,
             labels_lengths: torch.IntTensor,
             average_frames: bool = False,
             reduction: Optional[AnyStr] = None,
             blank: int = 0) -> torch.Tensor:
    """The CUDA-Warp Recurrent Neural Aligner loss.

    Args:
        log_probs (torch.FloatTensor): Input tensor with shape (N, T, U, V)
            where N is the minibatch size, T is the maximum number of
            input frames, U is the maximum number of output labels and V is
            the vocabulary of labels (including the blank).
        labels (torch.IntTensor): Tensor with shape (N, U-1) representing the
            reference labels for all samples in the minibatch.
        frames_lengths (torch.IntTensor): Tensor with shape (N,) representing the
            number of frames for each sample in the minibatch.
        labels_lengths (torch.IntTensor): Tensor with shape (N,) representing the
            length of the transcription for each sample in the minibatch.
        average_frames (bool, optional): Specifies whether the loss of each
            sample should be divided by its number of frames.
            Default: False.
        reduction (string, optional): Specifies the type of reduction.
            Default: None.
        blank (int, optional): label used to represent the blank symbol.
            Default: 0.
    """

    assert average_frames is None or isinstance(average_frames, bool)
    assert reduction is None or reduction in ("none", "mean", "sum")
    assert isinstance(blank, int)

    assert not labels.requires_grad, "labels does not require gradients"
    assert not frames_lengths.requires_grad, "frames_lengths does not require gradients"
    assert not labels_lengths.requires_grad, "labels_lengths does not require gradients"

    costs = RNALoss.apply(log_probs, labels, frames_lengths, labels_lengths,
                          blank)

    if average_frames:
        costs = costs / frames_lengths.to(log_probs)

    if reduction == "sum":
        return costs.sum()
    elif reduction == "mean":
        return costs.mean()
    return costs
Exemple #4
0
def rnnt_loss(log_probs: torch.FloatTensor,
              labels: torch.IntTensor,
              frames_lengths: torch.IntTensor,
              labels_lengths: torch.IntTensor,
              average_frames: bool = False,
              reduction: Optional[AnyStr] = None,
              blank: int = 0,
              gather: bool = False) -> torch.Tensor:

    """The CUDA-Warp RNN-Transducer loss.

    Args:
        log_probs (torch.FloatTensor): Input tensor with shape (N, T, U, V)
            where N is the minibatch size, T is the maximum number of
            input frames, U is the maximum number of output labels and V is
            the vocabulary of labels (including the blank).
        labels (torch.IntTensor): Tensor with shape (N, U-1) representing the
            reference labels for all samples in the minibatch.
        frames_lengths (torch.IntTensor): Tensor with shape (N,) representing the
            number of frames for each sample in the minibatch.
        labels_lengths (torch.IntTensor): Tensor with shape (N,) representing the
            length of the transcription for each sample in the minibatch.
        average_frames (bool, optional): Specifies whether the loss of each
            sample should be divided by its number of frames.
            Default: False.
        reduction (string, optional): Specifies the type of reduction.
            Default: None.
        blank (int, optional): label used to represent the blank symbol.
            Default: 0.
        gather (bool, optional): Reduce memory consumption.
            Default: False.
    """

    assert average_frames is None or isinstance(average_frames, bool)
    assert reduction is None or reduction in ("none", "mean", "sum")
    assert isinstance(blank, int)
    assert isinstance(gather, bool)

    assert not labels.requires_grad, "labels does not require gradients"
    assert not frames_lengths.requires_grad, "frames_lengths does not require gradients"
    assert not labels_lengths.requires_grad, "labels_lengths does not require gradients"

    if gather:

        N, T, U, V = log_probs.size()

        index = torch.full([N, T, U, 2], blank, device=labels.device, dtype=torch.long)

        index[:, :, :U-1, 1] = labels.unsqueeze(dim=1)

        log_probs = log_probs.gather(dim=3, index=index)

        blank = -1

    costs = RNNTLoss.apply(log_probs, labels, frames_lengths, labels_lengths, blank)

    if average_frames:
        costs = costs / frames_lengths.to(log_probs)

    if reduction == "sum":
        return costs.sum()
    elif reduction == "mean":
        return costs.mean()
    return costs