def forward(self, x): '''The feedforward step''' x = self.model_body(x) state_value = self.v(x) raw_advantages = self.adv(x) out = math_util.calc_q_value_logits(state_value, raw_advantages) return out
def forward(self, x): '''The feedforward step''' x = self.conv_model(x) x = x.view(x.size(0), -1) # to (batch_size, -1) if hasattr(self, 'fc_model'): x = self.fc_model(x) state_value = self.v(x) raw_advantages = self.adv(x) out = math_util.calc_q_value_logits(state_value, raw_advantages) return out
def test_calc_q_value_logits(): state_value = torch.tensor([[1.], [2.], [3.]]) advantages = torch.tensor([[0., 1.], [1., 1.], [1., 0.]]) result = torch.tensor([[0.5, 1.5], [2.0, 2.0], [3.5, 2.5]]) out = math_util.calc_q_value_logits(state_value, advantages) assert torch.allclose(out, result)