Example #1
0
 def test_different_first_observation_if_different_seed(self):
     timestep_1 = soccer.load(team_size=2, random_state=1).reset()
     timestep_2 = soccer.load(team_size=2, random_state=2).reset()
     try:
         self.assertSameObservation(timestep_1.observation,
                                    timestep_2.observation)
     except AssertionError:
         pass
     else:
         self.fail("Observations are unexpectedly identical.")
 def test_same_first_observation_if_same_seed(self, disable_walker_contacts):
   seed = 42
   timestep_1 = soccer.load(
       team_size=2,
       random_state=seed,
       disable_walker_contacts=disable_walker_contacts).reset()
   timestep_2 = soccer.load(
       team_size=2,
       random_state=seed,
       disable_walker_contacts=disable_walker_contacts).reset()
   self.assertSameObservation(timestep_1.observation, timestep_2.observation)
Example #3
0
 def __init__(self, team_size, time_limit):
     """
     Create a object that bridges Soccre env from dm_control to Ray MultiAgentEnv interface
     :param team_size: how many agents are in one team
     :param time_limit: timestep of the env (1second = 40timestep)
     """
     self.team_size = team_size
     self.agent_num = 2 * team_size
     self.n = self.agent_num
     self.env = dm_soccer.load(team_size=team_size, time_limit=time_limit)
     # action space creation
     self.action_space = [
         spaces.Box(-1.0, 1.0, shape=[3], dtype=np.float32)
     ] * self.agent_num
     # observation space creation
     self.feature_space_length = 0
     for feature_name, feature_spec in self.env.observation_spec()[0].items(
     ):
         self.feature_space_length += np.prod(feature_spec.shape)
     self.observation_space = [
         spaces.Box(low=-np.inf,
                    high=np.inf,
                    shape=(self.feature_space_length, ),
                    dtype=np.float32)
     ] * self.n
     self.info = {}
Example #4
0
 def __init__(self, batch_size=None, **kwargs):
     # super().__init__(batch_size, **kwargs)
     self.base_env = dm_soccer.load(kwargs["team_size"],
                                    kwargs["time_limit"])
     act_spec = self.base_env.action_spec()
     self.n_agents = len(act_spec)
     self.observation_space = [Box(low=-10000, high=10000, shape=(154, ))
                               ] * self.n_agents
     self.action_space = [Box(low=-1, high=1, shape=(3, ))] * self.n_agents
Example #5
0
    def __init__(self, env_name, ALGO):
        """
		A base template for all environment wrappers.
		"""
        from dm_control.locomotion import soccer as dm_soccer

        import gym
        self.env = dm_soccer.load(team_size=2, time_limit=10.)
        self.action_specs = env.action_spec()
        self.ALGO = ALGO
  def test_load_env(self, team_size, disable_walker_contacts):
    env = soccer.load(team_size=team_size, time_limit=2.,
                      disable_walker_contacts=disable_walker_contacts)
    action_specs = env.action_spec()

    random_state = np.random.RandomState(0)
    time_step = env.reset()
    while not time_step.last():
      actions = []
      for action_spec in action_specs:
        action = random_state.uniform(
            action_spec.minimum, action_spec.maximum, size=action_spec.shape)
        actions.append(action)
      time_step = env.step(actions)

      for i in range(len(action_specs)):
        logging.info(
            "Player %d: reward = %s, discount = %s, observations = %s.", i,
            time_step.reward[i], time_step.discount, time_step.observation[i])
Example #7
0
 def __init__(self,
              team_size=2,
              time_limit=45,
              disable_walker_contacts=True,
              team_num=2,
              render_name="human"):
     self.team_size = team_size
     self.team_num = team_num
     self.env = dm_soccer.load(self.team_size, time_limit,
                               disable_walker_contacts)
     ac_sp_i = spaces.Box(low=-1.0, high=1.0, shape=(3, ), dtype=np.float32)
     ac_sp = spaces.Tuple(
         [spaces.Tuple(tuple([ac_sp_i] * self.team_size))] * self.team_num)
     self.action_space = ac_sp
     #print(self.action_space)
     self.observation_space = spaces.Tuple([NoneSpace(), NoneSpace()])
     self.timestep = None
     odict_sp = {}
     odict = self.env.observation_spec()
     for key in odict[0]:
         odict_sp[key] = spaces.Box(-np.inf,
                                    np.inf,
                                    shape=(np.int(
                                        np.prod(odict[0][key].shape)), ))
     self.observation_space = spaces.Tuple(
         [spaces.Tuple([spaces.Dict(odict_sp)] * self.team_size)] *
         self.team_num)
     # render
     render_mode_list = self.create_render_mode(render_name,
                                                show=False,
                                                return_pixel=True)
     if render_mode_list is not None:
         self.metadata['render.modes'] = list(render_mode_list.keys())
         self.viewer = {key: None for key in render_mode_list.keys()}
     else:
         self.metadata['render.modes'] = []
     self.render_mode_list = render_mode_list
     # set seed
     self._seed()
Example #8
0
def make_env():
    # Load the 2-vs-2 soccer environment with episodes of 10 seconds:
    dm_env = dm_soccer.load(team_size=2, time_limit=10.)
    env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env))
    return env
Example #9
0
 def test_same_first_observation_if_same_seed(self):
     seed = 42
     timestep_1 = soccer.load(team_size=2, random_state=seed).reset()
     timestep_2 = soccer.load(team_size=2, random_state=seed).reset()
     self.assertSameObservation(timestep_1.observation,
                                timestep_2.observation)
import numpy as np
from dm_control.locomotion import soccer as dm_soccer

# Load the 2-vs-2 soccer environment with episodes of 10 seconds:
env = dm_soccer.load(team_size=2, time_limit=10.)

# Retrieves action_specs for all 4 players.
action_specs = env.action_spec()

# Step through the environment for one episode with random actions.
time_step = env.reset()
while not time_step.last():
    actions = []
    for action_spec in action_specs:
        action = np.random.uniform(action_spec.minimum,
                                   action_spec.maximum,
                                   size=action_spec.shape)
        actions.append(action)
    time_step = env.step(actions)

    for i in range(len(action_specs)):
        print(
            "Player {}: reward = {}, discount = {}, observations = {}.".format(
                i, time_step.reward[i], time_step.discount,
                time_step.observation[i]))