def test_multiple_goals(self): initializer = _ScoringInitializer() time_limit = 1.0 control_timestep = 0.025 env = composer.Environment( task=soccer.MultiturnTask( players=_home_team(1) + _away_team(1), arena=soccer.Pitch((20, 15), field_box=True), # disable throw-in. initializer=initializer, control_timestep=control_timestep), time_limit=time_limit) timestep = env.reset() num_steps = 0 rewards = [np.zeros(s.shape, s.dtype) for s in env.reward_spec()] while not timestep.last(): timestep = env.step( [spec.generate_value() for spec in env.action_spec()]) for reward, r_t in zip(rewards, timestep.reward): reward += r_t num_steps += 1 self.assertEqual(num_steps, time_limit / control_timestep) num_scores = initializer.num_calls - 1 # discard initialization. self.assertEqual(num_scores, 6) self.assertEqual(rewards, [ np.full((), num_scores, np.float32), np.full((), -num_scores, np.float32) ])
def _env(players, disable_walker_contacts=True, observables=None): return composer.Environment(task=soccer.Task( players=players, arena=soccer.Pitch((20, 15)), observables=observables, disable_walker_contacts=disable_walker_contacts, ), time_limit=1)
def _env(players, disable_walker_contacts=True, observables=None, random_state=42, **task_kwargs): return composer.Environment( task=soccer.Task( players=players, arena=soccer.Pitch((20, 15)), observables=observables, disable_walker_contacts=disable_walker_contacts, **task_kwargs ), random_state=random_state, time_limit=1)