コード例 #1
0
def rodent_run_gaps(random_state=None):
    """Requires a rodent to run down a corridor with gaps."""

    # Build a position-controlled rodent walker.
    walker = rodent.Rat(
        observable_options={'egocentric_camera': dict(enabled=True)})

    # Build a corridor-shaped arena with gaps, where the sizes of the gaps and
    # platforms are uniformly randomized.
    arena = corr_arenas.GapsCorridor(platform_length=distributions.Uniform(
        .4, .8),
                                     gap_length=distributions.Uniform(.05, .2),
                                     corridor_width=2,
                                     corridor_length=40,
                                     aesthetic='outdoor_natural')

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(5, 0, 0),
                                         walker_spawn_rotation=0,
                                         target_velocity=1.0,
                                         contact_termination=False,
                                         terminate_at_height=-0.3,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
コード例 #2
0
  def test_termination_and_discount(self):
    walker = cmu_humanoid.CMUHumanoid()
    arena = corridor_arenas.EmptyCorridor()
    task = corridor_tasks.RunThroughCorridor(walker, arena)

    random_state = np.random.RandomState(12345)
    env = composer.Environment(task, random_state=random_state)
    env.reset()

    zero_action = np.zeros_like(env.physics.data.ctrl)

    # Walker starts in upright position.
    # Should not trigger failure termination in the first few steps.
    for _ in range(5):
      env.step(zero_action)
      self.assertFalse(task.should_terminate_episode(env.physics))
      self.assertEqual(task.get_discount(env.physics), 1)

    # Rotate the walker upside down and run the physics until it makes contact.
    current_time = env.physics.data.time
    walker.shift_pose(env.physics, position=(0, 0, 10), quaternion=(0, 1, 0, 0))
    env.physics.forward()
    while env.physics.data.ncon == 0:
      env.physics.step()
    env.physics.data.time = current_time

    # Should now trigger a failure termination.
    env.step(zero_action)
    self.assertTrue(task.should_terminate_episode(env.physics))
    self.assertEqual(task.get_discount(env.physics), 0)
コード例 #3
0
  def test_walker_is_correctly_reinitialized(
      self, position_offset, rotate_180_degrees, use_variations):
    walker_spawn_position = position_offset

    if not rotate_180_degrees:
      walker_spawn_rotation = None
    else:
      walker_spawn_rotation = np.pi

    if use_variations:
      walker_spawn_position = deterministic.Constant(position_offset)
      walker_spawn_rotation = deterministic.Constant(walker_spawn_rotation)

    walker = cmu_humanoid.CMUHumanoid()
    arena = corridor_arenas.EmptyCorridor()
    task = corridor_tasks.RunThroughCorridor(
        walker=walker,
        arena=arena,
        walker_spawn_position=walker_spawn_position,
        walker_spawn_rotation=walker_spawn_rotation)

    # Randomize the initial pose and joint positions in order to check that they
    # are set correctly by `initialize_episode`.
    random_state = np.random.RandomState(12345)
    task.initialize_episode_mjcf(random_state)
    physics = mjcf.Physics.from_mjcf_model(task.root_entity.mjcf_model)

    walker_joints = walker.mjcf_model.find_all('joint')
    physics.bind(walker_joints).qpos = random_state.uniform(
        size=len(walker_joints))
    walker.set_pose(physics,
                    position=random_state.uniform(size=3),
                    quaternion=rotations.UniformQuaternion()(random_state))

    task.initialize_episode(physics, random_state)
    physics.forward()

    with self.subTest('Correct joint positions'):
      walker_qpos = physics.bind(walker_joints).qpos
      if walker.upright_pose.qpos is not None:
        np.testing.assert_array_equal(walker_qpos, walker.upright_pose.qpos)
      else:
        walker_qpos0 = physics.bind(walker_joints).qpos0
        np.testing.assert_array_equal(walker_qpos, walker_qpos0)

    walker_xpos, walker_xquat = walker.get_pose(physics)

    with self.subTest('Correct position'):
      expected_xpos = walker.upright_pose.xpos + np.array(position_offset)
      np.testing.assert_array_equal(walker_xpos, expected_xpos)

    with self.subTest('Correct orientation'):
      upright_xquat = walker.upright_pose.xquat.copy()
      upright_xquat /= np.linalg.norm(walker.upright_pose.xquat)
      if rotate_180_degrees:
        expected_xquat = (-upright_xquat[3], -upright_xquat[2],
                          upright_xquat[1], upright_xquat[0])
      else:
        expected_xquat = upright_xquat
      np.testing.assert_allclose(walker_xquat, expected_xquat)
コード例 #4
0
def cmu_humanoid_run_gaps(random_state=None):
    """Requires a CMU humanoid to run down a corridor with gaps."""

    # Build a position-controlled CMU humanoid walker.
    walker = cmu_humanoid.CMUHumanoidPositionControlled(
        observable_options={'egocentric_camera': dict(enabled=True)})

    # Build a corridor-shaped arena with gaps, where the sizes of the gaps and
    # platforms are uniformly randomized.
    arena = corr_arenas.GapsCorridor(
        platform_length=distributions.Uniform(.3, 2.5),
        gap_length=distributions.Uniform(.5, 1.25),
        corridor_width=10,
        corridor_length=100)

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(0.5, 0, 0),
                                         target_velocity=3.0,
                                         physics_timestep=0.005,
                                         control_timestep=0.03)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
コード例 #5
0
def walker_run_gaps(random_state=None):
    walker = planar_walker.PlanarWalker()

    # Build a corridor-shaped arena with gaps, where the sizes of the gaps and
    # platforms are uniformly randomized.
    arena = corr_arenas.GapsCorridor(
        platform_length=distributions.Uniform(1.25, 2.5),  # (0.3, 2.5)
        gap_length=distributions.Uniform(0.3, 0.7),  # (0.5, 1.25)
        corridor_width=10,
        corridor_length=250)

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(1.0, 0, 0),
                                         stand_height=1.2,
                                         contact_termination=False,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    # (Chongyi Zheng): redefine reward function
    task.get_reward = _walker_get_reward.__get__(task, task.get_reward)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
コード例 #6
0
def cmu_humanoid_run_walls(random_state=None):
    """Requires a CMU humanoid to run down a corridor obstructed by walls."""

    # Build a position-controlled CMU humanoid walker.
    walker = cmu_humanoid.CMUHumanoidPositionControlled(
        observable_options={'egocentric_camera': dict(enabled=True)})

    # Build a corridor-shaped arena that is obstructed by walls.
    arena = corr_arenas.WallsCorridor(wall_gap=4.,
                                      wall_width=distributions.Uniform(1, 7),
                                      wall_height=3.0,
                                      corridor_width=10,
                                      corridor_length=100,
                                      include_initial_padding=False)

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(0.5, 0, 0),
                                         target_velocity=3.0,
                                         physics_timestep=0.005,
                                         control_timestep=0.03)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
コード例 #7
0
def ant_run_walls():
    walker = ant.Ant()

    arena = corr_arenas.WallsCorridor(wall_gap=4.,
                                      wall_width=distributions.Uniform(1, 7),
                                      wall_height=3.0,
                                      corridor_width=10,
                                      corridor_length=250,
                                      include_initial_padding=False)

    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(0.5, 0, 0),
                                         walker_spawn_rotation=0,
                                         stand_height=0.2,
                                         contact_termination=False,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    # (Chongyi Zheng): redefine reward function
    # task.get_reward = _ant_get_reward.__get__(task, task.get_reward)

    return composer.Environment(time_limit=30,
                                task=task,
                                strip_singleton_obs_buffer_dim=True)
コード例 #8
0
def jumping_ball_run_gaps(random_state=None):
    walker = jumping_ball.JumpingBallWithHead()

    # Build a corridor-shaped arena with gaps, where the sizes of the gaps and
    # platforms are uniformly randomized.
    arena = corr_arenas.GapsCorridor(
        platform_length=distributions.Uniform(1.0, 2.5),  # (0.3, 2.5)
        gap_length=distributions.Uniform(0.3, 0.7),  # (0.5, 1.25)
        corridor_width=10,
        corridor_length=250)

    # Build a task that rewards the agent for running down the corridor at a
    # specific velocity.
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(1.0, 0, 0),
                                         target_velocity=3.0,
                                         contact_termination=False,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
コード例 #9
0
def ant_run(random_state=None):
    walker = ant.Ant()
    arena = corr_arenas.EmptyCorridor()
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(5, 0, 0),
                                         walker_spawn_rotation=0,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
コード例 #10
0
def _get_jumping_ball_corridor_physics():
    walker = jumping_ball.JumpingBallWithHead()
    arena = corr_arenas.EmptyCorridor()
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(5, 0, 0),
                                         walker_spawn_rotation=0,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    env = composer.Environment(time_limit=30,
                               task=task,
                               strip_singleton_obs_buffer_dim=True)

    return walker, env
コード例 #11
0
def jumping_ball_run_long():
    walker = jumping_ball.JumpingBallWithHead()

    arena = corr_arenas.EmptyCorridor(corridor_length=250,
                                      visible_side_planes=False)
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(1, 0, 0),
                                         walker_spawn_rotation=0,
                                         contact_termination=False,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                strip_singleton_obs_buffer_dim=True)
コード例 #12
0
def walker_run_long():
    walker = planar_walker.PlanarWalker()
    arena = corr_arenas.EmptyCorridor(corridor_length=250,
                                      visible_side_planes=False)
    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(1, 0, 0),
                                         walker_spawn_rotation=0,
                                         stand_height=1.2,
                                         contact_termination=False,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    # (Chongyi Zheng): redefine reward function
    task.get_reward = _walker_get_reward.__get__(task, task.get_reward)

    return composer.Environment(time_limit=30,
                                task=task,
                                strip_singleton_obs_buffer_dim=True)
コード例 #13
0
def jumping_ball_run_walls():
    walker = jumping_ball.JumpingBallWithHead()

    arena = corr_arenas.WallsCorridor(wall_gap=4.,
                                      wall_width=distributions.Uniform(1, 7),
                                      wall_height=3.0,
                                      corridor_width=10,
                                      corridor_length=250,
                                      include_initial_padding=False)

    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(0.5, 0, 0),
                                         walker_spawn_rotation=0,
                                         contact_termination=False,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                strip_singleton_obs_buffer_dim=True)
コード例 #14
0
def ant_run_walls(random_state=None):
    walker = ant.Ant()

    arena = corr_arenas.WallsCorridor(wall_gap=4.,
                                      wall_width=distributions.Uniform(1, 7),
                                      wall_height=3.0,
                                      corridor_width=10,
                                      corridor_length=100,
                                      include_initial_padding=False)

    task = corr_tasks.RunThroughCorridor(walker=walker,
                                         arena=arena,
                                         walker_spawn_position=(0.5, 0, 0),
                                         walker_spawn_rotation=0,
                                         physics_timestep=_PHYSICS_TIMESTEP,
                                         control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)