Esempio n. 1
0
def cmu_humanoid_run_gaps(random_state=None):
    """Requires a CMU humanoid to run down a corridor with gaps."""

    # Build a position-controlled CMU humanoid walker.
    walker = cmu_humanoid.CMUHumanoidPositionControlled(
        observable_options={'egocentric_camera': dict(enabled=True)})

    # Build a corridor-shaped arena with gaps, where the sizes of the gaps and
    # platforms are uniformly randomized.
    arena = corr_arenas.GapsCorridor(
        platform_length=distributions.Uniform(.3, 2.5),
        gap_length=distributions.Uniform(.5, 1.25),
        corridor_width=10,
        corridor_length=100)

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(0.5, 0, 0),
                                         target_velocity=3.0,
                                         physics_timestep=0.005,
                                         control_timestep=0.03)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Esempio n. 2
0
def load(team_size, time_limit=45., random_state=None):
  """Construct `team_size`-vs-`team_size` soccer environment.

  Args:
    team_size: Integer, the number of players per team. Must be between 1 and
      11.
    time_limit: Float, the maximum duration of each episode in seconds.
    random_state: (optional) an int seed or `np.random.RandomState` instance.

  Returns:
    A `composer.Environment` instance.

  Raises:
    ValueError: If `team_size` is not between 1 and 11.
  """
  if team_size < 0 or team_size > 11:
    raise ValueError(
        "Team size must be between 1 and 11 (received %d)." % team_size)

  return composer.Environment(
      task=Task(
          players=_make_players(team_size),
          arena=RandomizedPitch(
              min_size=(32, 24), max_size=(48, 36), keep_aspect_ratio=True),
      ),
      time_limit=time_limit,
      random_state=random_state)
Esempio n. 3
0
def cmu_humanoid_run_walls(random_state=None):
    """Requires a CMU humanoid to run down a corridor obstructed by walls."""

    # Build a position-controlled CMU humanoid walker.
    walker = cmu_humanoid.CMUHumanoidPositionControlled(
        observable_options={'egocentric_camera': dict(enabled=True)})

    # Build a corridor-shaped arena that is obstructed by walls.
    arena = corr_arenas.WallsCorridor(wall_gap=4.,
                                      wall_width=distributions.Uniform(1, 7),
                                      wall_height=3.0,
                                      corridor_width=10,
                                      corridor_length=100,
                                      include_initial_padding=False)

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(0.5, 0, 0),
                                         target_velocity=3.0,
                                         physics_timestep=0.005,
                                         control_timestep=0.03)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Esempio n. 4
0
def build_env(reward_type,
              ghost_offset=0,
              clip_name='CMU_016_22',
              start_step=0,
              force_magnitude=0,
              disable_observables=True,
              termination_error_threshold=1e10):
    walker = cmu_humanoid.CMUHumanoidPositionControlledV2020
    arena = floors.Floor()
    task = tracking.MultiClipMocapTracking(
        walker=walker,
        arena=arena,
        ref_path=cmu_mocap_data.get_path_for_cmu(version='2020'),
        dataset=types.ClipCollection(ids=[clip_name]),
        ref_steps=(1, 2, 3, 4, 5),
        start_step=start_step,
        max_steps=256,
        reward_type=reward_type,
        always_init_at_clip_start=True,
        termination_error_threshold=termination_error_threshold,
        ghost_offset=ghost_offset,
        force_magnitude=force_magnitude,
        disable_observables=disable_observables,
    )
    env = composer.Environment(
        task=task, random_state=np.random.RandomState(seed=FLAGS.seed))
    return env
Esempio n. 5
0
def load(team_size,
         time_limit=45.,
         random_state=None,
         disable_walker_contacts=False,
         walker_type=WalkerType.BOXHEAD):
    """Construct `team_size`-vs-`team_size` soccer environment.

  Args:
    team_size: Integer, the number of players per team. Must be between 1 and
      11.
    time_limit: Float, the maximum duration of each episode in seconds.
    random_state: (optional) an int seed or `np.random.RandomState` instance.
    disable_walker_contacts: (optional) if `True`, disable physical contacts
      between walkers.
    walker_type: the type of walker to instantiate in the environment.

  Returns:
    A `composer.Environment` instance.

  Raises:
    ValueError: If `team_size` is not between 1 and 11.
    ValueError: If `walker_type` is not recognized.
  """

    return composer.Environment(task=Task(
        players=_make_players(team_size, walker_type),
        arena=RandomizedPitch(min_size=(32, 24),
                              max_size=(48, 36),
                              keep_aspect_ratio=True),
        disable_walker_contacts=disable_walker_contacts),
                                time_limit=time_limit,
                                random_state=random_state)
Esempio n. 6
0
    def test_multiple_goals(self):
        initializer = _ScoringInitializer()
        time_limit = 1.0
        control_timestep = 0.025
        env = composer.Environment(
            task=soccer.MultiturnTask(
                players=_home_team(1) + _away_team(1),
                arena=soccer.Pitch((20, 15),
                                   field_box=True),  # disable throw-in.
                initializer=initializer,
                control_timestep=control_timestep),
            time_limit=time_limit)

        timestep = env.reset()
        num_steps = 0
        rewards = [np.zeros(s.shape, s.dtype) for s in env.reward_spec()]
        while not timestep.last():
            timestep = env.step(
                [spec.generate_value() for spec in env.action_spec()])
            for reward, r_t in zip(rewards, timestep.reward):
                reward += r_t
            num_steps += 1
        self.assertEqual(num_steps, time_limit / control_timestep)

        num_scores = initializer.num_calls - 1  # discard initialization.
        self.assertEqual(num_scores, 6)
        self.assertEqual(rewards, [
            np.full((), num_scores, np.float32),
            np.full((), -num_scores, np.float32)
        ])
  def testActivation(self):
    target_radius = 0.6
    prop_radius = 0.1
    target_height = 1

    arena = floors.Floor()
    target = target_sphere.TargetSphere(radius=target_radius,
                                        height_above_ground=target_height)
    prop = primitive.Primitive(geom_type='sphere', size=[prop_radius])
    arena.attach(target)
    arena.add_free_entity(prop)

    task = composer.NullTask(arena)
    task.initialize_episode = (
        lambda physics, random_state: prop.set_pose(physics, [0, 0, 2]))

    env = composer.Environment(task)
    env.reset()

    max_activated_height = target_height + target_radius + prop_radius

    while env.physics.bind(prop.geom).xpos[2] > max_activated_height:
      self.assertFalse(target.activated)
      self.assertEqual(env.physics.bind(target.material).rgba[-1], 1)
      env.step([])

    while env.physics.bind(prop.geom).xpos[2] > 0.2:
      self.assertTrue(target.activated)
      self.assertEqual(env.physics.bind(target.material).rgba[-1], 0)
      env.step([])

    # Target should be reset when the environment is reset.
    env.reset()
    self.assertFalse(target.activated)
    self.assertEqual(env.physics.bind(target.material).rgba[-1], 1)
Esempio n. 8
0
def ant_run_walls():
    walker = ant.Ant()

    arena = corr_arenas.WallsCorridor(wall_gap=4.,
                                      wall_width=distributions.Uniform(1, 7),
                                      wall_height=3.0,
                                      corridor_width=10,
                                      corridor_length=250,
                                      include_initial_padding=False)

    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(0.5, 0, 0),
                                         walker_spawn_rotation=0,
                                         stand_height=0.2,
                                         contact_termination=False,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    # (Chongyi Zheng): redefine reward function
    # task.get_reward = _ant_get_reward.__get__(task, task.get_reward)

    return composer.Environment(time_limit=30,
                                task=task,
                                strip_singleton_obs_buffer_dim=True)
Esempio n. 9
0
def walker_run_gaps(random_state=None):
    walker = planar_walker.PlanarWalker()

    # Build a corridor-shaped arena with gaps, where the sizes of the gaps and
    # platforms are uniformly randomized.
    arena = corr_arenas.GapsCorridor(
        platform_length=distributions.Uniform(1.25, 2.5),  # (0.3, 2.5)
        gap_length=distributions.Uniform(0.3, 0.7),  # (0.5, 1.25)
        corridor_width=10,
        corridor_length=250)

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(1.0, 0, 0),
                                         stand_height=1.2,
                                         contact_termination=False,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    # (Chongyi Zheng): redefine reward function
    task.get_reward = _walker_get_reward.__get__(task, task.get_reward)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
 def _setup_basic_gtt_task(self, num_targets=1, reward_scale=1.0):
   walker = walkers.Ant()
   text_maze = arenas.padded_room.PaddedRoom(
       room_size=8, num_objects=2, pad_with_walls=True)
   maze_arena = arenas.MazeWithTargets(maze=text_maze)
   targets = []
   for _ in range(num_targets):
     targets.append(
         props.PositionDetector(
             pos=[0, 0, 0.5],
             size=[0.5, 0.5, 0.5],
             inverted=False,
             visible=True))
   test_predicates = [predicates.MoveWalkerToRandomTarget(walker, targets)]
   self._task = predicate_task.PredicateTask(
       walker=walker,
       maze_arena=maze_arena,
       predicates=test_predicates,
       targets=targets,
       randomize_num_predicates=False,
       reward_scale=reward_scale,
       terminating_reward_bonus=2.0,
       )
   random_state = np.random.RandomState(12345)
   self._env = composer.Environment(self._task, random_state=random_state)
   self._walker = walker
   self._targets = targets
 def test_error_too_few_targets(self):
   walker = walkers.Ant()
   num_targets = 5
   text_maze = arenas.padded_room.PaddedRoom(
       room_size=8, num_objects=2, pad_with_walls=True)
   maze_arena = arenas.MazeWithTargets(maze=text_maze)
   targets = []
   for _ in range(num_targets):
     targets.append(
         props.PositionDetector(
             pos=[0, 0, 0.5],
             size=[0.5, 0.5, 0.5],
             inverted=False,
             visible=True))
   test_predicates = [predicates.MoveWalkerToRandomTarget(walker, targets)]
   task = predicate_task.PredicateTask(
       walker=walker,
       maze_arena=maze_arena,
       predicates=test_predicates,
       targets=targets,
       randomize_num_predicates=False,
       reward_scale=1.0,
       terminating_reward_bonus=2.0,
   )
   random_state = np.random.RandomState(12345)
   env = composer.Environment(task, random_state=random_state)
   with self.assertRaisesWithLiteralMatch(
       RuntimeError, "The generated maze does not contain enough target "
       "positions for the requested number of props (0) and targets (5): "
       "got 2."
   ):
     env.reset()
Esempio n. 12
0
def jumping_ball_run_gaps(random_state=None):
    walker = jumping_ball.JumpingBallWithHead()

    # Build a corridor-shaped arena with gaps, where the sizes of the gaps and
    # platforms are uniformly randomized.
    arena = corr_arenas.GapsCorridor(
        platform_length=distributions.Uniform(1.0, 2.5),  # (0.3, 2.5)
        gap_length=distributions.Uniform(0.3, 0.7),  # (0.5, 1.25)
        corridor_width=10,
        corridor_length=250)

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(1.0, 0, 0),
                                         target_velocity=3.0,
                                         contact_termination=False,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Esempio n. 13
0
def _build_rodent_corridor_gaps():
    """Build environment where a rodent runs over gaps."""
    walker = walkers.Rat(
        observable_options={'egocentric_camera': dict(enabled=True)}, )

    platform_length = distributions.Uniform(low=0.4, high=0.8)
    gap_length = distributions.Uniform(low=0.05, high=0.2)
    arena = arenas.corridors.GapsCorridor(corridor_width=2,
                                          platform_length=platform_length,
                                          gap_length=gap_length,
                                          corridor_length=40,
                                          aesthetic='outdoor_natural')

    rodent_task = tasks.corridors.RunThroughCorridor(walker=walker,
                                                     arena=arena,
                                                     walker_spawn_position=(5,
                                                                            0,
                                                                            0),
                                                     walker_spawn_rotation=0,
                                                     target_velocity=1.0,
                                                     contact_termination=False,
                                                     terminate_at_height=-0.3,
                                                     physics_timestep=0.001,
                                                     control_timestep=.02)
    raw_env = composer.Environment(time_limit=30,
                                   task=rodent_task,
                                   strip_singleton_obs_buffer_dim=True)

    return raw_env
Esempio n. 14
0
def _build_rodent_two_touch_env():
    """Build environment where a rodent touches targets."""
    walker = walkers.Rat(
        observable_options={'egocentric_camera': dict(enabled=True)}, )

    arena_floor = arenas.floors.Floor(size=(10., 10.),
                                      aesthetic='outdoor_natural')
    task_reach = tasks.reach.TwoTouch(
        walker=walker,
        arena=arena_floor,
        target_builders=[
            functools.partial(props.target_sphere.TargetSphereTwoTouch,
                              radius=0.025),
        ],
        randomize_spawn_rotation=True,
        target_type_rewards=[25.],
        shuffle_target_builders=False,
        target_area=(1.5, 1.5),
        physics_timestep=0.001,
        control_timestep=.02)

    raw_env = composer.Environment(time_limit=30,
                                   task=task_reach,
                                   strip_singleton_obs_buffer_dim=True)

    return raw_env
Esempio n. 15
0
def rodent_run_gaps(random_state=None):
    """Requires a rodent to run down a corridor with gaps."""

    # Build a position-controlled rodent walker.
    walker = rodent.Rat(
        observable_options={'egocentric_camera': dict(enabled=True)})

    # Build a corridor-shaped arena with gaps, where the sizes of the gaps and
    # platforms are uniformly randomized.
    arena = corr_arenas.GapsCorridor(platform_length=distributions.Uniform(
        .4, .8),
                                     gap_length=distributions.Uniform(.05, .2),
                                     corridor_width=2,
                                     corridor_length=40,
                                     aesthetic='outdoor_natural')

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(5, 0, 0),
                                         walker_spawn_rotation=0,
                                         target_velocity=1.0,
                                         contact_termination=False,
                                         terminate_at_height=-0.3,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Esempio n. 16
0
def rodent_two_touch(random_state=None):
    """Requires a rodent to tap an orb, wait an interval, and tap it again."""

    # Build a position-controlled rodent walker.
    walker = rodent.Rat(
        observable_options={'egocentric_camera': dict(enabled=True)})

    arena = floors.Floor(size=(10., 10.), aesthetic='outdoor_natural')

    task = reach.TwoTouch(
        walker=walker,
        arena=arena,
        target_builders=[
            functools.partial(target_sphere.TargetSphereTwoTouch,
                              radius=0.025),
        ],
        randomize_spawn_rotation=True,
        target_type_rewards=[25.],
        shuffle_target_builders=False,
        target_area=(1.5, 1.5),
        physics_timestep=_PHYSICS_TIMESTEP,
        control_timestep=_CONTROL_TIMESTEP,
    )

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Esempio n. 17
0
    def test_termination_and_discount(self):
        walker = cmu_humanoid.CMUHumanoid()
        arena = floors.Floor()
        task = go_to_target.GoToTarget(walker=walker, arena=arena)

        random_state = np.random.RandomState(12345)
        env = composer.Environment(task, random_state=random_state)
        env.reset()

        zero_action = np.zeros_like(env.physics.data.ctrl)

        # Walker starts in upright position.
        # Should not trigger failure termination in the first few steps.
        for _ in range(5):
            env.step(zero_action)
            self.assertFalse(task.should_terminate_episode(env.physics))
            np.testing.assert_array_equal(task.get_discount(env.physics), 1)

        # Rotate the walker upside down and run the physics until it makes contact.
        current_time = env.physics.data.time
        walker.shift_pose(env.physics,
                          position=(0, 0, 10),
                          quaternion=(0, 1, 0, 0))
        env.physics.forward()
        while env.physics.data.ncon == 0:
            env.physics.step()
        env.physics.data.time = current_time

        # Should now trigger a failure termination.
        env.step(zero_action)
        self.assertTrue(task.should_terminate_episode(env.physics))
        np.testing.assert_array_equal(task.get_discount(env.physics), 0)
Esempio n. 18
0
    def test_reward_fixed_target(self):
        walker = cmu_humanoid.CMUHumanoid()
        arena = floors.Floor()
        task = go_to_target.GoToTarget(walker=walker,
                                       arena=arena,
                                       moving_target=False)

        random_state = np.random.RandomState(12345)
        env = composer.Environment(task, random_state=random_state)
        env.reset()

        target_position = task.target_position(env.physics)
        zero_action = np.zeros_like(env.physics.data.ctrl)
        for _ in range(2):
            timestep = env.step(zero_action)
            self.assertEqual(timestep.reward, 0)
        walker_pos = env.physics.bind(walker.root_body).xpos
        walker.set_pose(
            env.physics,
            position=[target_position[0], target_position[1], walker_pos[2]])
        env.physics.forward()

        # Receive reward while the agent remains at that location.
        timestep = env.step(zero_action)
        self.assertEqual(timestep.reward, 1)

        # Target position should not change.
        np.testing.assert_array_equal(target_position,
                                      task.target_position(env.physics))
Esempio n. 19
0
  def test_prop_factory(self):
    task = tracking.MultiClipMocapTracking(
        walker=self.walker,
        arena=self.arena,
        ref_path=self.test_data,
        dataset=types.ClipCollection(ids=('cmuv2019_001', 'cmuv2019_002')),
        ref_steps=(0,),
        min_steps=1,
        disable_props=False,
        prop_factory=props.Prop,
    )
    env = composer.Environment(task=task)

    observation = env.reset().observation
    # Test the expected prop observations exist and have the expected size.
    dims = [3, 4]
    for key, dim in zip(REFERENCE_PROP_KEYS, dims):
      self.assertIn(key, task.observables)
      self.assertSequenceEqual(observation[key].shape, (N_PROPS, dim))

    # Since no ghost offset was specified, test that there are no ghost props.
    self.assertEmpty(task._ghost_props)

    # Test that props go to the expected location on reset.
    for ref_key, obs_key in zip(REFERENCE_PROP_KEYS, PROP_OBSERVATION_KEYS):
      np.testing.assert_array_equal(observation[ref_key], observation[obs_key])

    # Test that prop position contributes to termination error. (?)
    task._set_walker(env.physics)
    wrong_position = observation[REFERENCE_PROP_KEYS[0]] + np.ones(3)
    task._props[0].set_pose(env.physics, wrong_position)
    task.after_step(env.physics, 0)
    self.assertGreater(task._termination_error, 0.)
Esempio n. 20
0
  def test_ghost_prop(self):
    task = tracking.MultiClipMocapTracking(
        walker=self.walker,
        arena=self.arena,
        ref_path=self.test_data,
        dataset=types.ClipCollection(ids=('cmuv2019_001', 'cmuv2019_002')),
        ref_steps=(0,),
        min_steps=1,
        disable_props=False,
        prop_factory=props.Prop,
        ghost_offset=GHOST_OFFSET,
    )
    env = composer.Environment(task=task)

    # Test that the ghost props are present when ghost_offset specified.
    self.assertLen(task._ghost_props, N_PROPS)

    # Test that the ghost prop tracks the goal trajectory after step.
    env.reset()
    observation = env.step(env.action_spec().generate_value()).observation
    ghost_pos, ghost_quat = task._ghost_props[0].get_pose(env.physics)
    goal_pos, goal_quat = (
        np.squeeze(observation[key]) for key in REFERENCE_PROP_KEYS)

    np.testing.assert_array_equal(np.array(ghost_pos), goal_pos + GHOST_OFFSET)
    np.testing.assert_array_equal(ghost_quat, goal_quat)
    def test_contact(self):
        walker = rodent.Rat()

        # Build a corridor-shaped arena that is obstructed by walls.
        arena = bowl.Bowl(size=(20., 20.), aesthetic='outdoor_natural')

        # Build a task that rewards the agent for running down the corridor at a
        # specific velocity.
        task = escape.Escape(walker=walker,
                             arena=arena,
                             physics_timestep=_PHYSICS_TIMESTEP,
                             control_timestep=_CONTROL_TIMESTEP)

        random_state = np.random.RandomState(12345)
        env = composer.Environment(task, random_state=random_state)
        env.reset()

        zero_action = np.zeros_like(env.physics.data.ctrl)

        # Walker starts in upright position.
        # Should not trigger failure termination in the first few steps.
        for _ in range(5):
            env.step(zero_action)
            self.assertFalse(task.should_terminate_episode(env.physics))
            np.testing.assert_array_equal(task.get_discount(env.physics), 1)
Esempio n. 22
0
  def test_enabled_reference_observables(self):
    task = tracking.MultiClipMocapTracking(
        walker=self.walker,
        arena=self.arena,
        ref_path=self.test_data,
        dataset=types.ClipCollection(ids=('cmuv2019_001', 'cmuv2019_002')),
        ref_steps=(1, 2, 3, 4, 5),
        min_steps=1,
        reward_type='comic',
        enabled_reference_observables=('walker/reference_rel_joints',)
    )

    env = composer.Environment(task=task)

    timestep = env.reset()

    self.assertIn('walker/reference_rel_joints', timestep.observation.keys())
    self.assertNotIn('walker/reference_rel_root_pos_local',
                     timestep.observation.keys())

    # check that all desired observables are enabled.
    desired_observables = []
    desired_observables += task._walker.observables.proprioception
    desired_observables += task._walker.observables.kinematic_sensors
    desired_observables += task._walker.observables.dynamic_sensors

    for observable in desired_observables:
      self.assertTrue(observable.enabled)
Esempio n. 23
0
def build_vision_warehouse(random_state=None):
    """Build canonical 4-pedestal, 2-prop task."""

    # Build a position-controlled CMU humanoid walker.
    walker = cmu_humanoid.CMUHumanoidPositionControlled(
        observable_options={'egocentric_camera': dict(enabled=True)})

    # Build the task.
    size_distribution = distributions.Uniform(low=0.75, high=1.25)
    mass_distribution = distributions.Uniform(low=2, high=7)
    prop_resizer = mocap_loader.PropResizer(size_factor=size_distribution,
                                            mass=mass_distribution)
    task = warehouse.PhasedBoxCarry(
        walker=walker,
        num_props=2,
        num_pedestals=4,
        proto_modifier=prop_resizer,
        negative_reward_on_failure_termination=True)

    # return the environment
    return composer.Environment(time_limit=15,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True,
                                max_reset_attempts=float('inf'))
Esempio n. 24
0
def build_vision_toss(random_state=None):
    """Build canonical ball tossing task."""

    # Build a position-controlled CMU humanoid walker.
    walker = cmu_humanoid.CMUHumanoidPositionControlled(
        observable_options={'egocentric_camera': dict(enabled=True)})

    # Build the task.
    size_distribution = distributions.Uniform(low=0.95, high=1.5)
    mass_distribution = distributions.Uniform(low=2, high=4)
    prop_resizer = mocap_loader.PropResizer(size_factor=size_distribution,
                                            mass=mass_distribution)
    task = ball_toss.BallToss(walker=walker,
                              proto_modifier=prop_resizer,
                              negative_reward_on_failure_termination=True,
                              priority_friction=True,
                              bucket_offset=3.,
                              y_range=0.5,
                              toss_delay=1.5,
                              randomize_init=True)

    # return the environment
    return composer.Environment(time_limit=6,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True,
                                max_reset_attempts=float('inf'))
Esempio n. 25
0
def load(environment_name,
         env_kwargs=None,
         seed=None,
         time_limit=float('inf'),
         strip_singleton_obs_buffer_dim=False):
    """Loads an environment from board_games.

  Args:
    environment_name: String, the name of the environment to load. Must be in
      `ALL`.
    env_kwargs: extra params to pass to task creation.
    seed: Optional, either an int seed or an `np.random.RandomState`
      object. If None (default), the random number generator will self-seed
      from a platform-dependent source of entropy.
    time_limit: (optional) A float, the time limit in seconds beyond which an
      episode is forced to terminate.
    strip_singleton_obs_buffer_dim: (optional) A boolean, if `True`,
      the array shape of observations with `buffer_size == 1` will not have a
      leading buffer dimension.

  Returns:
    An instance of `composer.Environment`.
  """
    if env_kwargs is not None:
        task = _registry.get_constructor(environment_name)(**env_kwargs)
    else:
        task = _registry.get_constructor(environment_name)()
    return _composer.Environment(
        task=task,
        time_limit=time_limit,
        strip_singleton_obs_buffer_dim=strip_singleton_obs_buffer_dim,
        random_state=seed)
Esempio n. 26
0
  def test_observables(self):
    walker = rodent.Rat()

    arena = floors.Floor(
        size=(10., 10.),
        aesthetic='outdoor_natural')

    task = reach.TwoTouch(
        walker=walker,
        arena=arena,
        target_builders=[
            functools.partial(target_sphere.TargetSphereTwoTouch, radius=0.025),
        ],
        randomize_spawn_rotation=True,
        target_type_rewards=[25.],
        shuffle_target_builders=False,
        target_area=(1.5, 1.5),
        physics_timestep=_PHYSICS_TIMESTEP,
        control_timestep=_CONTROL_TIMESTEP,
    )
    random_state = np.random.RandomState(12345)
    env = composer.Environment(task, random_state=random_state)
    timestep = env.reset()

    self.assertIn('walker/joints_pos', timestep.observation)
Esempio n. 27
0
def cmu_humanoid_tracking(random_state=None):
    """Requires a CMU humanoid to run down a corridor obstructed by walls."""

    # Use a position-controlled CMU humanoid walker.
    walker_type = cmu_humanoid.CMUHumanoidPositionControlledV2020

    # Build an empty arena.
    arena = arenas.Floor()

    # Build a task that rewards the agent for tracking motion capture reference
    # data.
    task = tracking.MultiClipMocapTracking(
        walker=walker_type,
        arena=arena,
        ref_path=cmu_mocap_data.get_path_for_cmu_2020(),
        dataset='walk_tiny',
        ref_steps=(1, 2, 3, 4, 5),
        min_steps=10,
        reward_type='comic',
    )

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Esempio n. 28
0
def _build_humanoid_walls_env():
    """Build humanoid walker walls environment."""
    walker = walkers.CMUHumanoidPositionControlled(
        name='walker',
        observable_options={'egocentric_camera': dict(enabled=True)},
    )
    wall_width = distributions.Uniform(low=1, high=7)
    wall_height = distributions.Uniform(low=2.5, high=4.0)
    swap_wall_side = distributions.Bernoulli(prob=0.5)
    wall_r = distributions.Uniform(low=0.5, high=0.6)
    wall_g = distributions.Uniform(low=0.21, high=0.41)
    wall_rgba = colors.RgbVariation(r=wall_r, g=wall_g, b=0, alpha=1)
    arena = arenas.WallsCorridor(wall_gap=5.0,
                                 wall_width=wall_width,
                                 wall_height=wall_height,
                                 swap_wall_side=swap_wall_side,
                                 wall_rgba=wall_rgba,
                                 corridor_width=10,
                                 corridor_length=100)
    humanoid_task = tasks.RunThroughCorridor(
        walker=walker,
        arena=arena,
        walker_spawn_rotation=1.57,  # pi / 2
        physics_timestep=0.005,
        control_timestep=0.03)
    raw_env = composer.Environment(time_limit=30,
                                   task=humanoid_task,
                                   strip_singleton_obs_buffer_dim=True)

    return raw_env
def cmu_humanoid_heterogeneous_forage(random_state=None):
  """Requires a CMU humanoid to find all items of a particular type in a maze."""
  level = ('*******\n'
           '*     *\n'
           '*  P  *\n'
           '*     *\n'
           '*  G  *\n'
           '*     *\n'
           '*******\n')

  # Build a position-controlled CMU humanoid walker.
  walker = cmu_humanoid.CMUHumanoidPositionControlled(
      observable_options={'egocentric_camera': dict(enabled=True)})

  skybox_texture = labmaze_textures.SkyBox(style='sky_03')
  wall_textures = labmaze_textures.WallTextures(style='style_01')
  floor_textures = labmaze_textures.FloorTextures(style='style_01')
  maze = fixed_maze.FixedMazeWithRandomGoals(
      entity_layer=level,
      variations_layer=None,
      num_spawns=1,
      num_objects=6,
  )
  arena = mazes.MazeWithTargets(
      maze=maze,
      xy_scale=3.0,
      z_height=2.0,
      skybox_texture=skybox_texture,
      wall_textures=wall_textures,
      floor_textures=floor_textures,
  )
  task = random_goal_maze.ManyHeterogeneousGoalsMaze(
      walker=walker,
      maze_arena=arena,
      target_builders=[
          functools.partial(
              target_sphere.TargetSphere,
              radius=0.4,
              rgb1=(0, 0.4, 0),
              rgb2=(0, 0.7, 0)),
          functools.partial(
              target_sphere.TargetSphere,
              radius=0.4,
              rgb1=(0.4, 0, 0),
              rgb2=(0.7, 0, 0)),
      ],
      randomize_spawn_rotation=False,
      target_type_rewards=[30., -10.],
      target_type_proportions=[1, 1],
      shuffle_target_builders=True,
      aliveness_reward=0.01,
      control_timestep=.03,
  )

  return composer.Environment(
      time_limit=25,
      task=task,
      random_state=random_state,
      strip_singleton_obs_buffer_dim=True)
Esempio n. 30
0
def _env(players, disable_walker_contacts=True, observables=None):
    return composer.Environment(task=soccer.Task(
        players=players,
        arena=soccer.Pitch((20, 15)),
        observables=observables,
        disable_walker_contacts=disable_walker_contacts,
    ),
                                time_limit=1)