def forward(self, x: Tensor) -> Tensor:
        x = self.melspectrogram(x)
        x = x.unsqueeze(1)
        x = self.norm_input(x)

        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.maxpool(x)
        x = self.drop1(x)

        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.maxpool(x)
        x = self.drop2(x)

        x = F.relu(self.conv5(x))
        x = self.maxpool(x)
        x = self.drop2(x)

        x = F.relu(self.conv6(x))
        x = self.maxpool(x)
        x = self.drop2(x)

        x = F.relu(self.conv7(x))
        x = self.maxpool(x)
        x = self.drop2(x)

        x = self.fc(x.flatten(start_dim=1))
        x = self.fc_norm(x)
        return self.linear(x)
    def forward(self,
                query: Tensor,
                key: Tensor,
                value: Tensor,
                mask: Tensor = None) -> Tensor:
        """
        @param query shape -> [batch_size, max_length, emb_size]
        @param key shape -> [batch_size, max_length, emb_size]
        @param value shape -> [batch_size, max_length, emb_size]
        @param mask shape -> [1, max_length, max_length]
        @return a tensor with shape -> ??
        """
        if mask is not None:
            # 1, n, n -> 1, 1, n, n; n is max length of sentence
            mask = mask.unsqueeze(1)
        batch_size = query.size(0)

        # do projection
        query, key, value = [
            linear_f(x).view(batch_size, -1, self.head_count,
                             self.model_k_dim).transpose(1, 2)
            for linear_f, x in zip(self.linears, (query, key, value))
        ]
        # do attention
        x, self.attn = attention(query, key, value, mask, self.dropout)
        # do concatenation
        x = x.transpose(1, 2).contiguous().view(
            batch_size, -1, self.head_count * self.model_k_dim)
        return self.linears[-1](x)
def adaptive_scaling_loss(logits: Tensor, targets: Tensor, positive_idx: Tensor,
                          mask: Tensor = None, beta: float = 1.0, reduction='none',
                          weight_trainable: bool = False):
    """

    :param logits: (batch, num_label)
    :param targets: (batch, )
    :param positive_idx: (num_label)
        size is the number of all labels, positive_idx is 1, negative_idx is 0
    :param mask: (batch, )
    :param beta: float
    :param reduction:
        Specifies the reduction to apply to the output:
        ``'none'`` | ``'mean'`` | ``'sum'``.
        ``'none'``: no reduction will be applied,
        ``'mean'``: the sum of the output will be divided by the number of elements in the output,
        ``'sum'``: the output will be summed.
    :param weight_trainable: bool
            False, Stop gradient at weight beta
            True, gradient from beta weight back propagated to other parameters
    :return:
    """
    batch_size, num_label = logits.size()
    probs = allennlp_nn_utils.masked_softmax(logits, mask=mask)

    assert positive_idx.size(0) == num_label

    pos_label_mask = positive_idx.unsqueeze(0).expand(batch_size, num_label).to(logits.device)
    neg_label_mask = 1 - pos_label_mask

    targets_index = targets.unsqueeze(-1)

    tp = torch.sum(torch.gather(probs * pos_label_mask, 1, targets_index))
    tn = torch.sum(torch.gather(probs * neg_label_mask, 1, targets_index))

    p_vector = torch.gather(pos_label_mask, 1, targets_index).squeeze(-1).float()
    n_vector = torch.gather(neg_label_mask, 1, targets_index).squeeze(-1).float()
    p_sum = torch.sum(p_vector)
    n_sum = torch.sum(n_vector)
    weight_beta = tp / (beta * beta * p_sum + n_sum - tn)
    weight_beta = n_vector * weight_beta + p_vector

    if not weight_trainable:
        weight_beta.detach_()

    loss = nn.functional.cross_entropy(input=logits, target=targets, reduction='none')

    if mask is None:
        weight_loss = loss * weight_beta
    else:
        weight_loss = loss * weight_beta * mask

    if reduction == 'sum':
        return torch.sum(weight_loss)
    elif reduction == 'mean':
        if mask is None:
            return torch.mean(weight_loss)
        else:
            return torch.sum(weight_loss) / (torch.sum(mask) + 1e-13)
    elif reduction == 'none':
        return weight_loss
    else:
        raise NotImplementedError('reduction %s in ``adaptive_scaling_loss`` is not Implemented' % reduction)
Exemple #4
0
    def calc_loss(self, q_values: Tensor, target_q_values: Tensor,
                  actions: Tensor, rewards: Tensor, done_mask: Tensor,
                  state: Tensor, next_state: Tensor) -> Tensor:
        """
        Calculate the MSE loss of this step.
        The loss for an example is defined as:
            Q_samp(s) = r if done
                        = r + gamma * max_a' Q_target(s', a') otherwise
            loss = (Q_samp(s) - Q(s, a))^2

        Args:
            q_values: (torch tensor) shape = (batch_size, num_actions)
                The Q-values that your current network estimates (i.e. Q(s, a') for all a')
            target_q_values: (torch tensor) shape = (batch_size, num_actions)
                The Target Q-values that your target network estimates (i.e. (i.e. Q_target(s', a') for all a')
            actions: (torch tensor) shape = (batch_size,)
                The actions that you actually took at each step (i.e. a)
            rewards: (torch tensor) shape = (batch_size,)
                The rewards that you actually got at each step (i.e. r)
            done_mask: (torch tensor) shape = (batch_size,)
                A boolean mask of examples where we reached the terminal state

        Hint:
            You may find the following functions useful
                - torch.max
                - torch.sum
                - torch.nn.functional.one_hot
                - torch.nn.functional.mse_loss
            You can treat `done_mask` as a 0 and 1 where 0 is not done and 1 is done using torch.type as
            done below

            To extract Q(a) for a specific "a" you can use the torch.sum and torch.nn.functional.one_hot. 
            Think about how.
        """
        # you may need this variable
        num_actions = self.env.action_space.n
        gamma = self.config.gamma
        done_mask = done_mask.type(torch.int)
        actions = actions.type(torch.int64)
        ##############################################################
        ##################### YOUR CODE HERE - 3-5 lines #############
        '''
        # This is the vanilla DQN Loss function. The uncommented code is the DDQN Loss function
        best_target_q = torch.reshape(torch.max(target_q_values, dim=1, keepdim=True).values, (-1,))
        Q_samp = rewards + (1 - done_mask) * gamma * best_target_q
        Q_sa = torch.sum(q_values * torch.nn.functional.one_hot(actions, self.env.action_space.n), dim=1)
        loss = torch.nn.functional.mse_loss(Q_samp, Q_sa)'''
        state = state.to('cuda:0')
        next_state = next_state.to('cuda:0')
        actions = actions.to('cuda:0')
        rewards = rewards.to('cuda:0')
        done_mask = done_mask.to('cuda:0')
        actions = actions.unsqueeze(-1)
        state_action_vals = self.get_q_values(state,
                                              'q_network').gather(1, actions)
        state_action_vals = state_action_vals.squeeze(-1)
        next_state_action = self.get_q_values(next_state,
                                              'q_network').max(1)[1]
        next_state_action = next_state_action.unsqueeze(-1)
        next_state_vals = self.get_q_values(next_state, 'target').gather(
            1, next_state_action).squeeze(-1)

        exp_sa_vals = next_state_vals.detach() * gamma * (1 -
                                                          done_mask) + rewards
        loss = torch.nn.functional.mse_loss(state_action_vals, exp_sa_vals)

        ##############################################################
        ######################## END YOUR CODE #######################
        return loss