def pybullet_humanoid(): locals().update(default()) randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) env = 'HumanoidBulletEnv-v0' max_length = 1000 steps = 3e8 # 300M return locals()
def pybullet_duck_minitaur(): """Configuration specific to minitaur_gym_env.MinitaurBulletDuckEnv class.""" locals().update(default()) randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) env = functools.partial(minitaur_gym_env.MinitaurBulletDuckEnv, accurate_motor_model_enabled=True, motor_overheat_protection=True, pd_control_enabled=True, env_randomizer=randomizer, render=False) max_length = 1000 steps = 3e7 # 30M return locals()
def ResetPoseExample(): """An example that the minitaur stands still using the reset pose.""" steps = 1000 randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, pd_control_enabled=True, accurate_motor_model_enabled=True, motor_overheat_protection=True, env_randomizer=randomizer, hard_reset=False) action = [math.pi / 2] * 8 for _ in range(steps): _, _, done, _ = environment.step(action) if done: break environment.reset()
def SinePolicyExample(): """An example of minitaur walking with a sine gait.""" randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, on_rack=False) sum_reward = 0 steps = 20000 amplitude_1_bound = 0.1 amplitude_2_bound = 0.1 speed = 1 for step_counter in range(steps): time_step = 0.01 t = step_counter * time_step amplitude1 = amplitude_1_bound amplitude2 = amplitude_2_bound steering_amplitude = 0 if t < 10: steering_amplitude = 0.1 elif t < 20: steering_amplitude = -0.1 else: steering_amplitude = 0 # Applying asymmetrical sine gaits to different legs can steer the minitaur. a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude) a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude) a3 = math.sin(t * speed) * amplitude2 a4 = math.sin(t * speed + math.pi) * amplitude2 action = [a1, a2, a2, a1, a3, a4, a4, a3] _, reward, done, _ = environment.step(action) sum_reward += reward if done: break environment.reset()