Exemple #1
0
def fitness_minitaur(chromosome, steps):
    """Retorna el fitness de un cromosoma """
    # speed = chromosome[0]
    # time_step = chromosome[1]
    # amplitude1 = chromosome[2]
    # steering_amplitude = chromosome[3]
    # amplitufe2 = chromosome[4]

    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=False,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        hard_reset=False,
        env_randomizer=randomizer,
        on_rack=False)

    sum_reward = 0
    for step_counter in range(steps):
        t = step_counter * chromosome[1]
        a1 = math.sin(t * chromosome[0]) * (chromosome[2] + chromosome[3])
        a2 = math.sin(t * chromosome[0] + math.pi) * (chromosome[2] -
                                                      chromosome[3])
        a3 = math.sin(t * chromosome[0]) * chromosome[4]
        a4 = math.sin(t * chromosome[0] + math.pi) * chromosome[4]
        action = [a1, a2, a2, a1, a3, a4, a4, a3]
        _, reward, done, _ = environment.step(action)
        sum_reward += reward
        if done:
            sum_reward += 5  #castigo por caerse
            break
    environment.reset()
    fitness = sum_reward + chromosome[0]  # objetivos
    return fitness
Exemple #2
0
def pybullet_humanoid():
  locals().update(default())
  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
  env = 'HumanoidBulletEnv-v0'
  max_length = 1000
  steps = 3e8  # 300M
  return locals()
def ResetPoseExample():
    """
    An example that the minitaur stands still using the reset pose.
    """
    # How many steps
    steps = 1000

    # Random number generator
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())

    # Minitaur gym environmennt
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        leg_model_enabled=False,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        accurate_motor_model_enabled=True,
        motor_overheat_protection=True,
        env_randomizer=randomizer,
        hard_reset=False)

    # Angle to apply at each step
    action = [math.pi / 2] * 8

    # Run simulation steps
    for _ in range(steps):
        _, _, done, _ = environment.step(action)
        if done:
            break
        # end if
    # end for

    # Reset the environment
    environment.reset()
def SinePolicyExample():
    """An example of minitaur walking with a sine gait."""
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        hard_reset=False,
        env_randomizer=randomizer,
        on_rack=False)
    sum_reward = 0
    steps = 500  #20000
    amplitude_1_bound = 0.1
    amplitude_2_bound = 0.1
    speed = 95.76487704867183  #99.95830805709147 #96.03904323742279

    for step_counter in range(steps):
        time_step = 0.5180280589663037  #0.3738710084245188 #0.7118871187955457
        t = step_counter * time_step
        amplitude1 = 0.7958106754045535  #0.6704708050626745 #0.8313325863588368 #amplitude_1_bound
        amplitude2 = 0.5783009482997857  #0.24525131201122005 #0.2804425976578887 #amplitude_2_bound

        steering_amplitude = 0
        # Applying asymmetrical sine gaits to different legs can steer the minitaur.
        a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
        a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
        a3 = math.sin(t * speed) * amplitude2
        a4 = math.sin(t * speed + math.pi) * amplitude2
        action = [a1, a2, a2, a1, a3, a4, a4, a3]
        _, reward, done, _ = environment.step(action)
        sum_reward += reward
        print(sum_reward)
        if done:
            break
    environment.reset()
Exemple #5
0
def DDPG_Example():
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    env = minitaur_gym_env.MinitaurBulletEnv(render=True,
                                             motor_velocity_limit=np.inf,
                                             pd_control_enabled=True,
                                             hard_reset=False,
                                             env_randomizer=randomizer,
                                             shake_weight=0.0,
                                             drift_weight=0.0,
                                             energy_weight=0.0,
                                             on_rack=False)

    state_dim = 28
    action_dim = 4
    actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))
    with tf.Session() as sess:
        actor = ActorNetwork(sess,
                             state_dim,
                             action_dim,
                             learning_rate=float(LR_A),
                             tau=TAU,
                             batch_size=BATCH_SIZE)

        critic = CriticNetwork(sess,
                               state_dim,
                               action_dim,
                               learning_rate=float(LR_C),
                               tau=TAU,
                               gamma=float(GAMMA),
                               num_actor_vars=actor.get_num_trainable_vars())

        train(sess, env, actor, critic, actor_noise, load_weights=False)
Exemple #6
0
def pybullet_minitaur():
    """Configuration specific to minitaur_gym_env.MinitaurBulletEnv class."""
    locals().update(default())
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    env = functools.partial(minitaur_gym_env.MinitaurBulletEnv,
                            accurate_motor_model_enabled=True,
                            motor_overheat_protection=True,
                            pd_control_enabled=True,
                            env_randomizer=randomizer,
                            render=False)
    max_length = 1000
    steps = 3e7  # 30M
    return locals()
def ResetPoseExample():
    """An example that the minitaur stands still using the reset pose."""
    steps = 1000
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        leg_model_enabled=False,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        accurate_motor_model_enabled=True,
        motor_overheat_protection=True,
        env_randomizer=randomizer,
        hard_reset=False)
    action = [math.pi / 2] * 8
    for _ in range(steps):
        _, _, done, _ = environment.step(action)
        if done:
            break
    environment.reset()
def SinePolicyExample():
    """An example of minitaur walking with a sine gait."""
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        hard_reset=False,
        env_randomizer=randomizer,
        on_rack=False)
    sum_reward = 0
    steps = 20000
    amplitude_1_bound = 0.1
    amplitude_2_bound = 0.1
    speed = 1

    for step_counter in range(steps):
        time_step = 0.01
        t = step_counter * time_step

        amplitude1 = amplitude_1_bound
        amplitude2 = amplitude_2_bound
        steering_amplitude = 0
        if t < 10:
            steering_amplitude = 0.1
        elif t < 20:
            steering_amplitude = -0.1
        else:
            steering_amplitude = 0

        # Applying asymmetrical sine gaits to different legs can steer the minitaur.
        a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
        a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
        a3 = math.sin(t * speed) * amplitude2
        a4 = math.sin(t * speed + math.pi) * amplitude2
        action = [a1, a2, a2, a1, a3, a4, a4, a3]
        _, reward, done, _ = environment.step(action)
        sum_reward += reward
        if done:
            break
    environment.reset()
import sys
sys.path.insert(0, '../..')

import libs_agents
import numpy
import time

import gym
from pybullet_envs.bullet import minitaur_gym_env
from pybullet_envs.bullet import minitaur_env_randomizer

randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
env = minitaur_gym_env.MinitaurBulletEnv(render=True,
                                         leg_model_enabled=False,
                                         motor_velocity_limit=numpy.inf,
                                         pd_control_enabled=True,
                                         accurate_motor_model_enabled=True,
                                         motor_overheat_protection=True,
                                         env_randomizer=randomizer,
                                         hard_reset=False)


class Wrapper(gym.Wrapper):
    def __init__(self, env):
        gym.Wrapper.__init__(self, env)

    def step(self, action):
        action_ = numpy.pi * (0.1 * action + 0.5)
        return self.env.step(action_)

Exemple #10
0
  def __init__(self,
               urdf_root=pybullet_data.getDataPath(),
               action_repeat=1,
               distance_weight=1.0,
               energy_weight=0.005,
               shake_weight=0.0,
               drift_weight=0.0,
               distance_limit=float("inf"),
               observation_noise_stdev=0.0,
               self_collision_enabled=True,
               motor_velocity_limit=np.inf,
               pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD)
               leg_model_enabled=True,
               accurate_motor_model_enabled=True,
               motor_kp=1.0,
               motor_kd=0.02,
               torque_control_enabled=False,
               motor_overheat_protection=True,
               hard_reset=True,
               on_rack=False,
               render=False,
               kd_for_pd_controllers=0.3,
               env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
    """Initialize the minitaur gym environment.
    Args:
      urdf_root: The path to the urdf data folder.
      action_repeat: The number of simulation steps before actions are applied.
      distance_weight: The weight of the distance term in the reward.
      energy_weight: The weight of the energy term in the reward.
      shake_weight: The weight of the vertical shakiness term in the reward.
      drift_weight: The weight of the sideways drift term in the reward.
      distance_limit: The maximum distance to terminate the episode.
      observation_noise_stdev: The standard deviation of observation noise.
      self_collision_enabled: Whether to enable self collision in the sim.
      motor_velocity_limit: The velocity limit of each motor.
      pd_control_enabled: Whether to use PD controller for each motor.
      leg_model_enabled: Whether to use a leg motor to reparameterize the action
        space.
      accurate_motor_model_enabled: Whether to use the accurate DC motor model.
      motor_kp: proportional gain for the accurate motor model.
      motor_kd: derivative gain for the accurate motor model.
      torque_control_enabled: Whether to use the torque control, if set to
        False, pose control will be used.
      motor_overheat_protection: Whether to shutdown the motor that has exerted
        large torque (OVERHEAT_SHUTDOWN_TORQUE) for an extended amount of time
        (OVERHEAT_SHUTDOWN_TIME). See ApplyAction() in minitaur.py for more
        details.
      hard_reset: Whether to wipe the simulation and load everything when reset
        is called. If set to false, reset just place the minitaur back to start
        position and set its pose to initial configuration.
      on_rack: Whether to place the minitaur on rack. This is only used to debug
        the walking gait. In this mode, the minitaur's base is hanged midair so
        that its walking gait is clearer to visualize.
      render: Whether to render the simulation.
      kd_for_pd_controllers: kd value for the pd controllers of the motors
      env_randomizer: An EnvRandomizer to randomize the physical properties
        during reset().
    """
    self._time_step = 0.01
    self._action_repeat = action_repeat
    self._num_bullet_solver_iterations = 300
    self._urdf_root = urdf_root
    self._self_collision_enabled = self_collision_enabled
    self._motor_velocity_limit = motor_velocity_limit
    self._observation = []
    self._env_step_counter = 0
    self._is_render = render
    self._last_base_position = [0, 0, 0]
    self._distance_weight = distance_weight
    self._energy_weight = energy_weight
    self._drift_weight = drift_weight
    self._shake_weight = shake_weight
    self._distance_limit = distance_limit
    self._observation_noise_stdev = observation_noise_stdev
    self._action_bound = 1
    self._pd_control_enabled = pd_control_enabled
    self._leg_model_enabled = leg_model_enabled
    self._accurate_motor_model_enabled = accurate_motor_model_enabled
    self._motor_kp = motor_kp
    self._motor_kd = motor_kd
    self._torque_control_enabled = torque_control_enabled
    self._motor_overheat_protection = motor_overheat_protection
    self._on_rack = on_rack
    self._cam_dist = 1.0
    self._cam_yaw = 0
    self._duckId = -1
    self._cam_pitch = -30
    self._hard_reset = True
    self._kd_for_pd_controllers = kd_for_pd_controllers
    self._last_frame_time = 0.0
    print("urdf_root=" + self._urdf_root)
    self._env_randomizer = env_randomizer
    # PD control needs smaller time step for stability.
    if pd_control_enabled or accurate_motor_model_enabled:
      self._time_step /= NUM_SUBSTEPS
      self._num_bullet_solver_iterations /= NUM_SUBSTEPS
      self._action_repeat *= NUM_SUBSTEPS

    if self._is_render:
      self._pybullet_client = bullet_client.BulletClient(
          connection_mode=pybullet.GUI)
    else:
      self._pybullet_client = bullet_client.BulletClient()

    self._seed()
    self.reset()
    observation_high = (
        self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
    observation_low = (
        self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
    action_dim = 8
    action_high = np.array([self._action_bound] * action_dim)
    self.action_space = spaces.Box(-action_high, action_high)
    self.observation_space = spaces.Box(observation_low, observation_high)
    self.viewer = None
    self._hard_reset = hard_reset  # This assignment need to be after reset()
def SinePolicyExample():
    """
    An example of minitaur walking with a sine gait
    """
    # Random generator
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())

    # Load the minitaur environment
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        hard_reset=False,
        env_randomizer=randomizer,
        on_rack=False)

    # Reward summation
    sum_reward = 0

    # How many steps
    steps = 20000

    # Amplitudes
    amplitude_1_bound = 0.1
    amplitude_2_bound = 0.1

    # Speed parametere
    speed = 10

    # Run each steps
    for step_counter in range(steps):
        # Time step
        time_step = 0.01

        # Temporal position
        t = step_counter * time_step

        # Amplitudes
        amplitude1 = amplitude_1_bound
        amplitude2 = amplitude_2_bound
        steering_amplitude = 0

        # Forward
        if t < 10:
            steering_amplitude = 0.1
        # Backward
        elif t < 20:
            steering_amplitude = -0.1
        # Nothing
        else:
            steering_amplitude = 0
        # end if

        # Applying asymmetrical sine gaits to different legs can steer the minitaur.
        a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
        a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
        a3 = math.sin(t * speed) * amplitude2
        a4 = math.sin(t * speed + math.pi) * amplitude2

        # List of actions
        action = [a1, a2, a2, a1, a3, a4, a4, a3]

        # Apply action and get reward
        _, reward, done, _ = environment.step(action)

        # Add to total reward
        sum_reward += reward

        # If done, end
        if done:
            break
        # end if
    # end for

    # Reset the environment
    environment.reset()