def test_shaping_reward_reward(self): base_reward = (1, 2) shaping_reward = (3, 4) reward = Reward(base_reward, shaping_reward) expected_r = sum(base_reward + shaping_reward) / len(base_reward + shaping_reward) self.assertAlmostEqual(expected_r, reward.agent_reward())
def _store_reward(self, reward: rewards.Reward, sim: Simulation): sim[self.last_agent_reward] = reward.agent_reward() sim[self.last_assessment_reward] = reward.assessment_reward()