예제 #1
0
    def test_multiple_goals(self):
        initializer = _ScoringInitializer()
        time_limit = 1.0
        control_timestep = 0.025
        env = composer.Environment(
            task=soccer.MultiturnTask(
                players=_home_team(1) + _away_team(1),
                arena=soccer.Pitch((20, 15),
                                   field_box=True),  # disable throw-in.
                initializer=initializer,
                control_timestep=control_timestep),
            time_limit=time_limit)

        timestep = env.reset()
        num_steps = 0
        rewards = [np.zeros(s.shape, s.dtype) for s in env.reward_spec()]
        while not timestep.last():
            timestep = env.step(
                [spec.generate_value() for spec in env.action_spec()])
            for reward, r_t in zip(rewards, timestep.reward):
                reward += r_t
            num_steps += 1
        self.assertEqual(num_steps, time_limit / control_timestep)

        num_scores = initializer.num_calls - 1  # discard initialization.
        self.assertEqual(num_scores, 6)
        self.assertEqual(rewards, [
            np.full((), num_scores, np.float32),
            np.full((), -num_scores, np.float32)
        ])
예제 #2
0
def _env(players, disable_walker_contacts=True, observables=None):
    return composer.Environment(task=soccer.Task(
        players=players,
        arena=soccer.Pitch((20, 15)),
        observables=observables,
        disable_walker_contacts=disable_walker_contacts,
    ),
                                time_limit=1)
예제 #3
0
def _env(players, disable_walker_contacts=True, observables=None,
         random_state=42, **task_kwargs):
  return composer.Environment(
      task=soccer.Task(
          players=players,
          arena=soccer.Pitch((20, 15)),
          observables=observables,
          disable_walker_contacts=disable_walker_contacts,
          **task_kwargs
      ),
      random_state=random_state,
      time_limit=1)