def test_contact(self): walker = rodent.Rat() # Build a corridor-shaped arena that is obstructed by walls. arena = bowl.Bowl(size=(20., 20.), aesthetic='outdoor_natural') # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = escape.Escape(walker=walker, arena=arena, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() zero_action = np.zeros_like(env.physics.data.ctrl) # Walker starts in upright position. # Should not trigger failure termination in the first few steps. for _ in range(5): env.step(zero_action) self.assertFalse(task.should_terminate_episode(env.physics)) np.testing.assert_array_equal(task.get_discount(env.physics), 1)
def test_observables(self): walker = rodent.Rat() # Build a corridor-shaped arena that is obstructed by walls. arena = bowl.Bowl(size=(20., 20.), aesthetic='outdoor_natural') # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = escape.Escape(walker=walker, arena=arena, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) timestep = env.reset() self.assertIn('walker/joints_pos', timestep.observation)
def rodent_escape_bowl(random_state=None): """Requires a rodent to climb out of a bowl-shaped terrain.""" # Build a position-controlled rodent walker. walker = rodent.Rat( observable_options={'egocentric_camera': dict(enabled=True)}) # Build a bowl-shaped arena. arena = bowl.Bowl(size=(20., 20.), aesthetic='outdoor_natural') # Build a task that rewards the agent for being far from the origin. task = escape.Escape(walker=walker, arena=arena, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=20, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def ant_escape_bowl(random_state=None): walker = ant.Ant() # Build a bowl-shaped arena. arena = bowl.Bowl(ground_size=(15., 15.), hfield_size=(30, 30, 5), terrain_smoothness=0.15, terrain_bump_scale=2.0) # Build a task that rewards the agent for being far from the origin. task = escape.Escape(walker=walker, arena=arena, walker_spawn_position=(0, 0, 1.5), physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment( time_limit=30, # 20 task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)