def forward(self, input): """Computes Q-values through value and advantage heads; applies gradient scaling.""" x = scale_grad(input, self._grad_scale) advantage = self.advantage(x) value = self.value(x) return value + (advantage - advantage.mean(dim=-1, keepdim=True))
def forward(self, input): x = scale_grad(input, self._grad_scale) advantage = self.advantage(x) value = self.value(x).view(-1, 1, self._n_atoms) return value + (advantage - advantage.mean(dim=1, keepdim=True))