def testRewardCheckerBatchSizeOne(self): # Ensure batch size 1 with scalar reward works obs_spec = BoundedTensorSpec((2, 3), torch.int32, -10, 10) action_spec = BoundedTensorSpec((1, ), torch.int64) env = RandomAlfEnvironment(obs_spec, action_spec, reward_fn=lambda *_: np.array([1.0]), batch_size=1) env._done = False env.reset() action = torch.tensor([0], dtype=torch.int64) time_step = env.step(action) self.assertEqual(time_step.reward, 1.0)
def testCustomRewardFn(self): obs_spec = BoundedTensorSpec((2, 3), torch.int32, -10, 10) action_spec = BoundedTensorSpec((1, ), torch.int64) batch_size = 3 env = RandomAlfEnvironment(obs_spec, action_spec, reward_fn=lambda *_: np.ones(batch_size), batch_size=batch_size) env._done = False env.reset() action = torch.ones(batch_size) time_step = env.step(action) self.assertSequenceAlmostEqual([1.0] * 3, time_step.reward)
def testRewardCheckerSizeMismatch(self): # Ensure custom scalar reward with batch_size greater than 1 raises # ValueError obs_spec = BoundedTensorSpec((2, 3), torch.int32, -10, 10) action_spec = BoundedTensorSpec((1, ), torch.int64) env = RandomAlfEnvironment(obs_spec, action_spec, reward_fn=lambda *_: np.array([1.0]), batch_size=5) env.reset() env._done = False action = torch.tensor(0, dtype=torch.int64) with self.assertRaises(ValueError): env.step(action)