def memorize_exploration(self, states: torch.tensor, actions: torch.IntTensor, rewards: torch.tensor, last_state_is_final=True): """ Memorizes a whole exploration process with a final single reward. Should be used for processes for which the reward isn't specifically known for every state-action couple, but rather according to a final score. :param states: Successive states encountered. Should be a tensor of shape (number_of_states, state_dim) :param actions: Successive actions decided by the agent. Should be a tensor of shape (number_of_states - 1, ) :param next_states: For each state-action (s, a) encountered, state s' returned by the environment. Same shape as :param state:. :param rewards (number_of_states - 1, )-sized 1D Tensor indicating the rewards for the episode :param last_state_is_final: Indicates whether the last state in the exploration was final. """ states = states.to(self.device) # Creates a tensor containing [0, 0, ..., 0, 1] to indicate that only the last state was final final_indicator = torch.zeros(states.size()[0] - 1, device=self.device) final_indicator[-1] = last_state_is_final # States at the beginning of each step, including the final indicator next_states = torch.cat((states[1:], final_indicator.view(-1, 1)), dim=1) actions = actions.to(self.device) rewards = rewards.to(self.device) self.mem.memorize(states[:-1], actions, next_states, rewards)
def memorize(self, states: torch.tensor, actions: torch.IntTensor, next_states: torch.tensor, rewards: torch.tensor): """ Memorizes a batch of exploration transitions (quadruples s, a, ns, r). :param states: Successive states encountered. Should have shape (number_of_states, state_dim + 1) where the last column values are either 1 if the correspond state is final or 0 otherwise. :param actions: Successive actions decided by the agent. Should be a tensor of shape (number_of_states) :param next_states: (number_of_states, state_dim) shaped tensor indicating the next states. :param rewards: (number_of_states, )-sized 1D tensor containing the rewards for the episode. """ if len(states.size()) + len(actions.size()) + len(next_states.size()) != 5: raise ValueError("Wrong dimensions") return None # Make sure the tensors are on the right device states.to(self.device) next_states.to(self.device) actions.to(self.device) rewards.to(self.device) if self.need_init: self.state_mem = states self.action_mem = actions.type(torch.int64) self.nstate_mem = next_states self.reward_mem = rewards self.need_init = False else: self.state_mem = torch.cat((self.state_mem, states), dim=0) self.action_mem = torch.cat((self.action_mem, actions.type(torch.int64))) self.nstate_mem = torch.cat((self.nstate_mem, next_states), dim=0) nb_states_added = states.size()[0] self.reward_mem = torch.cat((self.reward_mem, rewards))
def rna_loss(log_probs: torch.FloatTensor, labels: torch.IntTensor, frames_lengths: torch.IntTensor, labels_lengths: torch.IntTensor, average_frames: bool = False, reduction: Optional[AnyStr] = None, blank: int = 0) -> torch.Tensor: """The CUDA-Warp Recurrent Neural Aligner loss. Args: log_probs (torch.FloatTensor): Input tensor with shape (N, T, U, V) where N is the minibatch size, T is the maximum number of input frames, U is the maximum number of output labels and V is the vocabulary of labels (including the blank). labels (torch.IntTensor): Tensor with shape (N, U-1) representing the reference labels for all samples in the minibatch. frames_lengths (torch.IntTensor): Tensor with shape (N,) representing the number of frames for each sample in the minibatch. labels_lengths (torch.IntTensor): Tensor with shape (N,) representing the length of the transcription for each sample in the minibatch. average_frames (bool, optional): Specifies whether the loss of each sample should be divided by its number of frames. Default: False. reduction (string, optional): Specifies the type of reduction. Default: None. blank (int, optional): label used to represent the blank symbol. Default: 0. """ assert average_frames is None or isinstance(average_frames, bool) assert reduction is None or reduction in ("none", "mean", "sum") assert isinstance(blank, int) assert not labels.requires_grad, "labels does not require gradients" assert not frames_lengths.requires_grad, "frames_lengths does not require gradients" assert not labels_lengths.requires_grad, "labels_lengths does not require gradients" costs = RNALoss.apply(log_probs, labels, frames_lengths, labels_lengths, blank) if average_frames: costs = costs / frames_lengths.to(log_probs) if reduction == "sum": return costs.sum() elif reduction == "mean": return costs.mean() return costs
def rnnt_loss(log_probs: torch.FloatTensor, labels: torch.IntTensor, frames_lengths: torch.IntTensor, labels_lengths: torch.IntTensor, average_frames: bool = False, reduction: Optional[AnyStr] = None, blank: int = 0, gather: bool = False) -> torch.Tensor: """The CUDA-Warp RNN-Transducer loss. Args: log_probs (torch.FloatTensor): Input tensor with shape (N, T, U, V) where N is the minibatch size, T is the maximum number of input frames, U is the maximum number of output labels and V is the vocabulary of labels (including the blank). labels (torch.IntTensor): Tensor with shape (N, U-1) representing the reference labels for all samples in the minibatch. frames_lengths (torch.IntTensor): Tensor with shape (N,) representing the number of frames for each sample in the minibatch. labels_lengths (torch.IntTensor): Tensor with shape (N,) representing the length of the transcription for each sample in the minibatch. average_frames (bool, optional): Specifies whether the loss of each sample should be divided by its number of frames. Default: False. reduction (string, optional): Specifies the type of reduction. Default: None. blank (int, optional): label used to represent the blank symbol. Default: 0. gather (bool, optional): Reduce memory consumption. Default: False. """ assert average_frames is None or isinstance(average_frames, bool) assert reduction is None or reduction in ("none", "mean", "sum") assert isinstance(blank, int) assert isinstance(gather, bool) assert not labels.requires_grad, "labels does not require gradients" assert not frames_lengths.requires_grad, "frames_lengths does not require gradients" assert not labels_lengths.requires_grad, "labels_lengths does not require gradients" if gather: N, T, U, V = log_probs.size() index = torch.full([N, T, U, 2], blank, device=labels.device, dtype=torch.long) index[:, :, :U-1, 1] = labels.unsqueeze(dim=1) log_probs = log_probs.gather(dim=3, index=index) blank = -1 costs = RNNTLoss.apply(log_probs, labels, frames_lengths, labels_lengths, blank) if average_frames: costs = costs / frames_lengths.to(log_probs) if reduction == "sum": return costs.sum() elif reduction == "mean": return costs.mean() return costs