def test_termination_and_discount(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() zero_action = np.zeros_like(env.physics.data.ctrl) # Walker starts in upright position. # Should not trigger failure termination in the first few steps. for _ in range(5): env.step(zero_action) self.assertFalse(task.should_terminate_episode(env.physics)) np.testing.assert_array_equal(task.get_discount(env.physics), 1) # Rotate the walker upside down and run the physics until it makes contact. current_time = env.physics.data.time walker.shift_pose(env.physics, position=(0, 0, 10), quaternion=(0, 1, 0, 0)) env.physics.forward() while env.physics.data.ncon == 0: env.physics.step() env.physics.data.time = current_time # Should now trigger a failure termination. env.step(zero_action) self.assertTrue(task.should_terminate_episode(env.physics)) np.testing.assert_array_equal(task.get_discount(env.physics), 0)
def test_reward_fixed_target(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena, moving_target=False) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() target_position = task.target_position(env.physics) zero_action = np.zeros_like(env.physics.data.ctrl) for _ in range(2): timestep = env.step(zero_action) self.assertEqual(timestep.reward, 0) walker_pos = env.physics.bind(walker.root_body).xpos walker.set_pose( env.physics, position=[target_position[0], target_position[1], walker_pos[2]]) env.physics.forward() # Receive reward while the agent remains at that location. timestep = env.step(zero_action) self.assertEqual(timestep.reward, 1) # Target position should not change. np.testing.assert_array_equal(target_position, task.target_position(env.physics))
def test_observables(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena, moving_target=False) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) timestep = env.reset() self.assertIn('walker/target', timestep.observation)
def test_target_position_randomized_on_reset(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena, moving_target=False) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() first_target_position = task.target_position(env.physics) env.reset() second_target_position = task.target_position(env.physics) self.assertFalse( np.all(first_target_position == second_target_position), 'Target positions are unexpectedly identical.')
def jumping_ball_go_to_target(random_state=None): walker = jumping_ball.JumpingBallWithHead() # Build a standard floor arena. arena = floors.Floor() # Build a task that rewards the agent for going to a target. task = go_to_target.GoToTarget(walker=walker, arena=arena, sparse_reward=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def cmu_humanoid_go_to_target(random_state=None): """Requires a CMU humanoid to go to a target.""" # Build a position-controlled CMU humanoid walker. walker = cmu_humanoid.CMUHumanoidPositionControlled() # Build a standard floor arena. arena = floors.Floor() # Build a task that rewards the agent for going to a target. task = go_to_target.GoToTarget(walker=walker, arena=arena, physics_timestep=0.005, control_timestep=0.03) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def _build_task(**task_kwargs): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena, **task_kwargs) return task