def forward(self, x: Tensor) -> Tensor: x = self.melspectrogram(x) x = x.unsqueeze(1) x = self.norm_input(x) x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = self.maxpool(x) x = self.drop1(x) x = F.relu(self.conv3(x)) x = F.relu(self.conv4(x)) x = self.maxpool(x) x = self.drop2(x) x = F.relu(self.conv5(x)) x = self.maxpool(x) x = self.drop2(x) x = F.relu(self.conv6(x)) x = self.maxpool(x) x = self.drop2(x) x = F.relu(self.conv7(x)) x = self.maxpool(x) x = self.drop2(x) x = self.fc(x.flatten(start_dim=1)) x = self.fc_norm(x) return self.linear(x)
def forward(self, query: Tensor, key: Tensor, value: Tensor, mask: Tensor = None) -> Tensor: """ @param query shape -> [batch_size, max_length, emb_size] @param key shape -> [batch_size, max_length, emb_size] @param value shape -> [batch_size, max_length, emb_size] @param mask shape -> [1, max_length, max_length] @return a tensor with shape -> ?? """ if mask is not None: # 1, n, n -> 1, 1, n, n; n is max length of sentence mask = mask.unsqueeze(1) batch_size = query.size(0) # do projection query, key, value = [ linear_f(x).view(batch_size, -1, self.head_count, self.model_k_dim).transpose(1, 2) for linear_f, x in zip(self.linears, (query, key, value)) ] # do attention x, self.attn = attention(query, key, value, mask, self.dropout) # do concatenation x = x.transpose(1, 2).contiguous().view( batch_size, -1, self.head_count * self.model_k_dim) return self.linears[-1](x)
def adaptive_scaling_loss(logits: Tensor, targets: Tensor, positive_idx: Tensor, mask: Tensor = None, beta: float = 1.0, reduction='none', weight_trainable: bool = False): """ :param logits: (batch, num_label) :param targets: (batch, ) :param positive_idx: (num_label) size is the number of all labels, positive_idx is 1, negative_idx is 0 :param mask: (batch, ) :param beta: float :param reduction: Specifies the reduction to apply to the output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, ``'mean'``: the sum of the output will be divided by the number of elements in the output, ``'sum'``: the output will be summed. :param weight_trainable: bool False, Stop gradient at weight beta True, gradient from beta weight back propagated to other parameters :return: """ batch_size, num_label = logits.size() probs = allennlp_nn_utils.masked_softmax(logits, mask=mask) assert positive_idx.size(0) == num_label pos_label_mask = positive_idx.unsqueeze(0).expand(batch_size, num_label).to(logits.device) neg_label_mask = 1 - pos_label_mask targets_index = targets.unsqueeze(-1) tp = torch.sum(torch.gather(probs * pos_label_mask, 1, targets_index)) tn = torch.sum(torch.gather(probs * neg_label_mask, 1, targets_index)) p_vector = torch.gather(pos_label_mask, 1, targets_index).squeeze(-1).float() n_vector = torch.gather(neg_label_mask, 1, targets_index).squeeze(-1).float() p_sum = torch.sum(p_vector) n_sum = torch.sum(n_vector) weight_beta = tp / (beta * beta * p_sum + n_sum - tn) weight_beta = n_vector * weight_beta + p_vector if not weight_trainable: weight_beta.detach_() loss = nn.functional.cross_entropy(input=logits, target=targets, reduction='none') if mask is None: weight_loss = loss * weight_beta else: weight_loss = loss * weight_beta * mask if reduction == 'sum': return torch.sum(weight_loss) elif reduction == 'mean': if mask is None: return torch.mean(weight_loss) else: return torch.sum(weight_loss) / (torch.sum(mask) + 1e-13) elif reduction == 'none': return weight_loss else: raise NotImplementedError('reduction %s in ``adaptive_scaling_loss`` is not Implemented' % reduction)
def calc_loss(self, q_values: Tensor, target_q_values: Tensor, actions: Tensor, rewards: Tensor, done_mask: Tensor, state: Tensor, next_state: Tensor) -> Tensor: """ Calculate the MSE loss of this step. The loss for an example is defined as: Q_samp(s) = r if done = r + gamma * max_a' Q_target(s', a') otherwise loss = (Q_samp(s) - Q(s, a))^2 Args: q_values: (torch tensor) shape = (batch_size, num_actions) The Q-values that your current network estimates (i.e. Q(s, a') for all a') target_q_values: (torch tensor) shape = (batch_size, num_actions) The Target Q-values that your target network estimates (i.e. (i.e. Q_target(s', a') for all a') actions: (torch tensor) shape = (batch_size,) The actions that you actually took at each step (i.e. a) rewards: (torch tensor) shape = (batch_size,) The rewards that you actually got at each step (i.e. r) done_mask: (torch tensor) shape = (batch_size,) A boolean mask of examples where we reached the terminal state Hint: You may find the following functions useful - torch.max - torch.sum - torch.nn.functional.one_hot - torch.nn.functional.mse_loss You can treat `done_mask` as a 0 and 1 where 0 is not done and 1 is done using torch.type as done below To extract Q(a) for a specific "a" you can use the torch.sum and torch.nn.functional.one_hot. Think about how. """ # you may need this variable num_actions = self.env.action_space.n gamma = self.config.gamma done_mask = done_mask.type(torch.int) actions = actions.type(torch.int64) ############################################################## ##################### YOUR CODE HERE - 3-5 lines ############# ''' # This is the vanilla DQN Loss function. The uncommented code is the DDQN Loss function best_target_q = torch.reshape(torch.max(target_q_values, dim=1, keepdim=True).values, (-1,)) Q_samp = rewards + (1 - done_mask) * gamma * best_target_q Q_sa = torch.sum(q_values * torch.nn.functional.one_hot(actions, self.env.action_space.n), dim=1) loss = torch.nn.functional.mse_loss(Q_samp, Q_sa)''' state = state.to('cuda:0') next_state = next_state.to('cuda:0') actions = actions.to('cuda:0') rewards = rewards.to('cuda:0') done_mask = done_mask.to('cuda:0') actions = actions.unsqueeze(-1) state_action_vals = self.get_q_values(state, 'q_network').gather(1, actions) state_action_vals = state_action_vals.squeeze(-1) next_state_action = self.get_q_values(next_state, 'q_network').max(1)[1] next_state_action = next_state_action.unsqueeze(-1) next_state_vals = self.get_q_values(next_state, 'target').gather( 1, next_state_action).squeeze(-1) exp_sa_vals = next_state_vals.detach() * gamma * (1 - done_mask) + rewards loss = torch.nn.functional.mse_loss(state_action_vals, exp_sa_vals) ############################################################## ######################## END YOUR CODE ####################### return loss