Exemple #1
0
def fitness_minitaur(chromosome, steps):
    """Retorna el fitness de un cromosoma """
    # speed = chromosome[0]
    # time_step = chromosome[1]
    # amplitude1 = chromosome[2]
    # steering_amplitude = chromosome[3]
    # amplitufe2 = chromosome[4]

    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=False,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        hard_reset=False,
        env_randomizer=randomizer,
        on_rack=False)

    sum_reward = 0
    for step_counter in range(steps):
        t = step_counter * chromosome[1]
        a1 = math.sin(t * chromosome[0]) * (chromosome[2] + chromosome[3])
        a2 = math.sin(t * chromosome[0] + math.pi) * (chromosome[2] -
                                                      chromosome[3])
        a3 = math.sin(t * chromosome[0]) * chromosome[4]
        a4 = math.sin(t * chromosome[0] + math.pi) * chromosome[4]
        action = [a1, a2, a2, a1, a3, a4, a4, a3]
        _, reward, done, _ = environment.step(action)
        sum_reward += reward
        if done:
            sum_reward += 5  #castigo por caerse
            break
    environment.reset()
    fitness = sum_reward + chromosome[0]  # objetivos
    return fitness
Exemple #2
0
def DDPG_Example():
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    env = minitaur_gym_env.MinitaurBulletEnv(render=True,
                                             motor_velocity_limit=np.inf,
                                             pd_control_enabled=True,
                                             hard_reset=False,
                                             env_randomizer=randomizer,
                                             shake_weight=0.0,
                                             drift_weight=0.0,
                                             energy_weight=0.0,
                                             on_rack=False)

    state_dim = 28
    action_dim = 4
    actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))
    with tf.Session() as sess:
        actor = ActorNetwork(sess,
                             state_dim,
                             action_dim,
                             learning_rate=float(LR_A),
                             tau=TAU,
                             batch_size=BATCH_SIZE)

        critic = CriticNetwork(sess,
                               state_dim,
                               action_dim,
                               learning_rate=float(LR_C),
                               tau=TAU,
                               gamma=float(GAMMA),
                               num_actor_vars=actor.get_num_trainable_vars())

        train(sess, env, actor, critic, actor_noise, load_weights=False)
def MotorOverheatExample():
    """An example of minitaur motor overheat protection is triggered.

  The minitaur is leaning forward and the motors are getting obove threshold
  torques. The overheat protection will be triggered in ~1 sec.
  """

    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        leg_model_enabled=False,
        motor_velocity_limit=np.inf,
        motor_overheat_protection=True,
        accurate_motor_model_enabled=True,
        motor_kp=1.20,
        motor_kd=0.00,
        on_rack=False)

    action = [.0] * 8
    for i in range(8):
        action[i] = .0 - 0.1 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)

    steps = 500
    actions_and_observations = []
    for step_counter in range(steps):
        # Matches the internal timestep.
        time_step = 0.01
        t = step_counter * time_step
        current_row = [t]
        current_row.extend(action)

        observation, _, _, _ = environment.step(action)
        current_row.extend(observation.tolist())
        actions_and_observations.append(current_row)
    environment.reset()
Exemple #4
0
def make_env(env_name, seed=-1, render_mode=False):
  if (env_name.startswith("RacecarBulletEnv")):
    print("bullet_racecar_started")
    env = racecarGymEnv.RacecarGymEnv(isDiscrete=False, renders=render_mode)
  elif (env_name.startswith("MinitaurBulletEnv")):
    print("bullet_minitaur_started")
    env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode)
  elif (env_name.startswith("MinitaurDuckBulletEnv")):
    print("bullet_minitaur_duck_started")
    env = MinitaurDuckBulletEnv(render=render_mode)
  elif (env_name.startswith("MinitaurBallBulletEnv")):
    print("bullet_minitaur_ball_started")
    env = MinitaurBallBulletEnv(render=render_mode)
  elif (env_name.startswith("KukaBulletEnv")):
    print("bullet_kuka_grasping started")
    env = kukaGymEnv.KukaGymEnv(renders=render_mode,isDiscrete=False)
  else:
    if env_name.startswith("Roboschool"):
      import roboschool
    env = gym.make(env_name)
    if render_mode and not env_name.startswith("Roboschool"):
      env.render("human")
  if (seed >= 0):
    env.seed(seed)
  '''
  print("environment details")
  print("env.action_space", env.action_space)
  print("high, low", env.action_space.high, env.action_space.low)
  print("environment details")
  print("env.observation_space", env.observation_space)
  print("high, low", env.observation_space.high, env.observation_space.low)
  assert False
  '''
  return env
Exemple #5
0
    def __init__(self, env_name='HalfCheetah-v1',
                 policy_params=None,
                 num_workers=32, 
                 num_deltas=320, 
                 deltas_used=320,
                 delta_std=0.02, 
                 logdir=None, 
                 rollout_length=1000,
                 step_size=0.01,
                 shift='constant zero',
                 params=None,
                 seed=123):

        logz.configure_output_dir(logdir)
        logz.save_params(params)
        
        env = minitaur_gym_env.MinitaurBulletEnv() #gym.make(env_name)
        
        self.timesteps = 0
        self.action_size = env.action_space.shape[0]
        self.ob_size = env.observation_space.shape[0]
        self.num_deltas = num_deltas
        self.deltas_used = deltas_used
        self.rollout_length = rollout_length
        self.step_size = step_size
        self.delta_std = delta_std
        self.logdir = logdir
        self.shift = shift
        self.params = params
        self.max_past_avg_reward = float('-inf')
        self.num_episodes_used = float('inf')

        
        # create shared table for storing noise
        print("Creating deltas table.")
        deltas_id = create_shared_noise.remote()

        self.deltas = SharedNoiseTable(ray.get(deltas_id), seed = seed + 3)
        print('Created deltas table.')

        # initialize workers with different random seeds
        print('Initializing workers.') 
        self.num_workers = num_workers
        self.workers = [Worker.remote(seed + 7 * i,
                                      env_name=env_name,
                                      policy_params=policy_params,
                                      deltas=deltas_id,
                                      rollout_length=rollout_length,
                                      delta_std=delta_std) for i in range(num_workers)]

        # initialize policy 
        if policy_params['type'] == 'linear':
            self.policy = LinearPolicy(policy_params)
            self.w_policy = self.policy.get_weights()
        else:
            raise NotImplementedError
            
        # initialize optimization algorithm
        self.optimizer = optimizers.SGD(self.w_policy, self.step_size)        
        print("Initialization of ARS complete.")
def ResetPoseExample():
    """
    An example that the minitaur stands still using the reset pose.
    """
    # How many steps
    steps = 1000

    # Random number generator
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())

    # Minitaur gym environmennt
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        leg_model_enabled=False,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        accurate_motor_model_enabled=True,
        motor_overheat_protection=True,
        env_randomizer=randomizer,
        hard_reset=False)

    # Angle to apply at each step
    action = [math.pi / 2] * 8

    # Run simulation steps
    for _ in range(steps):
        _, _, done, _ = environment.step(action)
        if done:
            break
        # end if
    # end for

    # Reset the environment
    environment.reset()
def SinePolicyExample():
    """An example of minitaur walking with a sine gait."""
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        hard_reset=False,
        env_randomizer=randomizer,
        on_rack=False)
    sum_reward = 0
    steps = 500  #20000
    amplitude_1_bound = 0.1
    amplitude_2_bound = 0.1
    speed = 95.76487704867183  #99.95830805709147 #96.03904323742279

    for step_counter in range(steps):
        time_step = 0.5180280589663037  #0.3738710084245188 #0.7118871187955457
        t = step_counter * time_step
        amplitude1 = 0.7958106754045535  #0.6704708050626745 #0.8313325863588368 #amplitude_1_bound
        amplitude2 = 0.5783009482997857  #0.24525131201122005 #0.2804425976578887 #amplitude_2_bound

        steering_amplitude = 0
        # Applying asymmetrical sine gaits to different legs can steer the minitaur.
        a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
        a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
        a3 = math.sin(t * speed) * amplitude2
        a4 = math.sin(t * speed + math.pi) * amplitude2
        action = [a1, a2, a2, a1, a3, a4, a4, a3]
        _, reward, done, _ = environment.step(action)
        sum_reward += reward
        print(sum_reward)
        if done:
            break
    environment.reset()
def MotorOverheatExample():
    """
    An example of minitaur motor overheat protection is triggered.
    The minitaur is leaning forward and the motors are getting above threshold
    torques. The overheat protection will be triggered in ~1 sec.
    """
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        leg_model_enabled=False,
        motor_velocity_limit=np.inf,
        motor_overheat_protection=True,
        accurate_motor_model_enabled=True,
        motor_kp=1.20,
        motor_kd=0.00,
        on_rack=False)

    # Action to take at each step (empty)
    action = [.0] * 8

    # Fill actions
    for i in range(8):
        action[i] = .0 - 0.1 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
    # end for

    # How many steps to simulate
    steps = 500

    # Stock action and observations
    actions_and_observations = []

    # For each step
    for step_counter in range(steps):
        # Matches the internal timestep
        time_step = 0.01

        # Temporal position
        t = step_counter * time_step

        # Time position and action
        current_row = [t]
        current_row.extend(action)

        # Result from action
        observation, _, _, _ = environment.step(action)

        # Add observation to row
        current_row.extend(observation.tolist())

        # Save actions and observations
        actions_and_observations.append(current_row)
    # end for

    # Reset environment
    environment.reset()
def SineStandExample():
    """
    An example of minitaur standing and squatting on the floor.
    To validate the accurate motor model we command the robot and sit and stand up
    periodically in both simulation and experiment. We compare the measured motor
    trajectories, torques and gains.
    """
    # Load the environment
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        leg_model_enabled=False,
        motor_velocity_limit=np.inf,
        motor_overheat_protection=True,
        accurate_motor_model_enabled=True,
        motor_kp=1.20,
        motor_kd=0.02,
        on_rack=False)

    # How many steps, amplitude and steps
    steps = 1000
    amplitude = 0.5
    speed = 30

    # List to save actions and observations
    actions_and_observations = []

    # Run each steps
    for step_counter in range(steps):
        # Matches the internal time step
        time_step = 0.01

        # Time position
        t = step_counter * time_step

        # Add time
        current_row = [t]

        # Action as sine signal
        action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8

        # Add action
        current_row.extend(action)

        # Make action an get the results
        observation, _, _, _ = environment.step(action)

        # Add the observation
        current_row.extend(observation.tolist())

        # Append to the list
        actions_and_observations.append(current_row)
    # end for

    environment.reset()
Exemple #10
0
def make_env(env_name, seed=-1, render_mode=False):
    if (env_name.startswith("RacecarBulletEnv")):
        print("bullet_racecar_started")
        env = racecarGymEnv.RacecarGymEnv(isDiscrete=False,
                                          renders=render_mode)
    elif (env_name.startswith("RocketLander")):
        from box2d.rocket import RocketLander
        env = RocketLander()
    elif (env_name.startswith("BipedalWalker")):
        if (env_name.startswith("BipedalWalkerHardcore")):
            from box2d.biped import BipedalWalkerHardcore
            env = BipedalWalkerHardcore()
        else:
            from box2d.biped import BipedalWalker
            env = BipedalWalker()
    elif (env_name.startswith("MinitaurBulletEnv")):
        print("bullet_minitaur_started")
        env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode)
    elif (env_name.startswith("MinitaurDuckBulletEnv")):
        print("bullet_minitaur_duck_started")
        env = MinitaurDuckBulletEnv(render=render_mode)
    elif (env_name.startswith("MinitaurBallBulletEnv")):
        print("bullet_minitaur_ball_started")
        env = MinitaurBallBulletEnv(render=render_mode)
    elif (env_name.startswith("CartPoleSwingUp")):
        print("cartpole_swingup_started")
        from custom_envs.cartpole_swingup import CartPoleSwingUpEnv
        hardcore = env_name.startswith("CartPoleSwingUpHardcore")
        env = CartPoleSwingUpEnv(hardcore=hardcore)
    elif (env_name.startswith("KukaBulletEnv")):
        print("bullet_kuka_grasping started")
        env = kukaGymEnv.KukaGymEnv(renders=render_mode, isDiscrete=False)
    else:
        if env_name.startswith("Roboschool"):
            import roboschool
        env = gym.make(env_name)
        if render_mode and not env_name.startswith("Roboschool"):
            env.render("human")
    if (seed >= 0):
        env.seed(seed)
    '''
  print("environment details")
  print("env.action_space", env.action_space)
  print("high, low", env.action_space.high, env.action_space.low)
  print("environment details")
  print("env.observation_space", env.observation_space)
  print("high, low", env.observation_space.high, env.observation_space.low)
  assert False
  '''
    return env
Exemple #11
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('expert_policy_file', type=str)
    parser.add_argument('--render', action='store_true')
    parser.add_argument('--num_rollouts', type=int, default=1,
                        help='Number of expert rollouts')
    args = parser.parse_args()

    print('loading and building expert policy')
    lin_policy = np.load(args.expert_policy_file)
    lin_policy = lin_policy.items()[0][1]
    
    M = lin_policy[0]
    # mean and std of state vectors estimated online by ARS. 
    mean = lin_policy[1]
    std = lin_policy[2]
    env = minitaur_gym_env.MinitaurBulletEnv(render=True)#gym.make(env_name)        
#    env = gym.make(args.envname)

    returns = []
    observations = []
    actions = []
    for i in range(args.num_rollouts):

        print('iter', i)
        obs = env.reset()
        log_id = pybullet.startStateLogging(pybullet.STATE_LOGGING_VIDEO_MP4, "/usr/local/google/home/jietan/Projects/ARS/data/minitaur{}.mp4".format(i))
        done = False
        totalr = 0.
        steps = 0
        while not done:
            action = np.clip(np.dot(M, (obs - mean)/std), -1.0, 1.0)
            observations.append(obs)
            actions.append(action)

            obs, r, done, _ = env.step(action)
            totalr += r
            steps += 1
            if args.render:
                env.render()
            if steps % 100 == 0: print("%i/%i"%(steps, 1000))
            if steps >= 1000:
                break
        pybullet.stopStateLogging(log_id)
        returns.append(totalr)

    print('returns', returns)
    print('mean return', np.mean(returns))
    print('std of return', np.std(returns))
Exemple #12
0
def make_env(env_name, seed=-1, render_mode=False):
    if (env_name.startswith("RacecarBulletEnv")):
        print("bullet_racecar_started")
        env = racecarGymEnv.RacecarGymEnv(isDiscrete=False,
                                          renders=render_mode)
    elif (env_name.startswith("MinitaurBulletEnv")):
        print("bullet_minitaur_started")
        env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode)
    else:
        env = gym.make(env_name)
    if render_mode and ("BulletEnv-v0" in env_name):
        env.render("human")
    if (seed >= 0):
        env.seed(seed)

    return env
def ResetPoseExample():
    """An example that the minitaur stands still using the reset pose."""
    steps = 1000
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        leg_model_enabled=False,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        accurate_motor_model_enabled=True,
        motor_overheat_protection=True,
        env_randomizer=randomizer,
        hard_reset=False)
    action = [math.pi / 2] * 8
    for _ in range(steps):
        _, _, done, _ = environment.step(action)
        if done:
            break
    environment.reset()
def SinePolicyExample():
    """An example of minitaur walking with a sine gait."""
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        hard_reset=False,
        env_randomizer=randomizer,
        on_rack=False)
    sum_reward = 0
    steps = 20000
    amplitude_1_bound = 0.1
    amplitude_2_bound = 0.1
    speed = 1

    for step_counter in range(steps):
        time_step = 0.01
        t = step_counter * time_step

        amplitude1 = amplitude_1_bound
        amplitude2 = amplitude_2_bound
        steering_amplitude = 0
        if t < 10:
            steering_amplitude = 0.1
        elif t < 20:
            steering_amplitude = -0.1
        else:
            steering_amplitude = 0

        # Applying asymmetrical sine gaits to different legs can steer the minitaur.
        a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
        a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
        a3 = math.sin(t * speed) * amplitude2
        a4 = math.sin(t * speed + math.pi) * amplitude2
        action = [a1, a2, a2, a1, a3, a4, a4, a3]
        _, reward, done, _ = environment.step(action)
        sum_reward += reward
        if done:
            break
    environment.reset()
Exemple #15
0
    def __init__(self, env_seed,
                 env_name='',
                 policy_params = None,
                 deltas=None,
                 rollout_length=1000,
                 delta_std=0.02):

        # initialize OpenAI environment for each worker
        self.env = minitaur_gym_env.MinitaurBulletEnv()#gym.make(env_name)
        self.env.seed(env_seed)
        print ("env seed: {}".format(env_seed))
        # each worker gets access to the shared noise table
        # with independent random streams for sampling
        # from the shared noise table. 
        self.deltas = SharedNoiseTable(deltas, env_seed + 7)
        self.policy_params = policy_params
        if policy_params['type'] == 'linear':
            self.policy = LinearPolicy(policy_params)
        else:
            raise NotImplementedError
            
        self.delta_std = delta_std
        self.rollout_length = rollout_length
def SineStandExample():
    """An example of minitaur standing and squatting on the floor.

  To validate the accurate motor model we command the robot and sit and stand up
  periodically in both simulation and experiment. We compare the measured motor
  trajectories, torques and gains.
  """
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        leg_model_enabled=False,
        motor_velocity_limit=np.inf,
        motor_overheat_protection=True,
        accurate_motor_model_enabled=True,
        motor_kp=1.20,
        motor_kd=0.02,
        on_rack=False)
    steps = 1000
    amplitude = 0.5
    speed = 3

    actions_and_observations = []

    for step_counter in range(steps):
        # Matches the internal timestep.
        time_step = 0.01
        t = step_counter * time_step
        current_row = [t]

        action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8
        current_row.extend(action)
        print("Accion: ", action)

        observation, _, _, _ = environment.step(action)
        current_row.extend(observation.tolist())
        actions_and_observations.append(current_row)

    environment.reset()
Exemple #17
0
def run_ars(params):

    dir_path = params['dir_path']

    if not(os.path.exists(dir_path)):
        os.makedirs(dir_path)
    logdir = dir_path
    if not(os.path.exists(logdir)):
        os.makedirs(logdir)

    env = minitaur_gym_env.MinitaurBulletEnv() #gym.make(params['env_name'])
    ob_dim = env.observation_space.shape[0]
    ac_dim = env.action_space.shape[0]

    # set policy parameters. Possible filters: 'MeanStdFilter' for v2, 'NoFilter' for v1.
    policy_params={'type':'linear',
                   'ob_filter':params['filter'],
                   'ob_dim':ob_dim,
                   'ac_dim':ac_dim}

    ARS = ARSLearner(env_name=params['env_name'],
                     policy_params=policy_params,
                     num_workers=params['n_workers'], 
                     num_deltas=params['n_directions'],
                     deltas_used=params['deltas_used'],
                     step_size=params['step_size'],
                     delta_std=params['delta_std'], 
                     logdir=logdir,
                     rollout_length=params['rollout_length'],
                     shift=params['shift'],
                     params=params,
                     seed = params['seed'])
        
    ARS.train(params['n_iter'])
       
    return 
def SinePolicyExample():
    """
    An example of minitaur walking with a sine gait
    """
    # Random generator
    randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())

    # Load the minitaur environment
    environment = minitaur_gym_env.MinitaurBulletEnv(
        render=True,
        motor_velocity_limit=np.inf,
        pd_control_enabled=True,
        hard_reset=False,
        env_randomizer=randomizer,
        on_rack=False)

    # Reward summation
    sum_reward = 0

    # How many steps
    steps = 20000

    # Amplitudes
    amplitude_1_bound = 0.1
    amplitude_2_bound = 0.1

    # Speed parametere
    speed = 10

    # Run each steps
    for step_counter in range(steps):
        # Time step
        time_step = 0.01

        # Temporal position
        t = step_counter * time_step

        # Amplitudes
        amplitude1 = amplitude_1_bound
        amplitude2 = amplitude_2_bound
        steering_amplitude = 0

        # Forward
        if t < 10:
            steering_amplitude = 0.1
        # Backward
        elif t < 20:
            steering_amplitude = -0.1
        # Nothing
        else:
            steering_amplitude = 0
        # end if

        # Applying asymmetrical sine gaits to different legs can steer the minitaur.
        a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
        a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
        a3 = math.sin(t * speed) * amplitude2
        a4 = math.sin(t * speed + math.pi) * amplitude2

        # List of actions
        action = [a1, a2, a2, a1, a3, a4, a4, a3]

        # Apply action and get reward
        _, reward, done, _ = environment.step(action)

        # Add to total reward
        sum_reward += reward

        # If done, end
        if done:
            break
        # end if
    # end for

    # Reset the environment
    environment.reset()
Exemple #19
0
def make_env(env_name, seed=-1, render_mode=False):
    if (env_name.startswith("RacecarBulletEnv")):
        print("bullet_racecar_started")
        env = racecarGymEnv.RacecarGymEnv(isDiscrete=False,
                                          renders=render_mode)
    elif (env_name.startswith("AugmentBipedalWalker")):
        if (env_name.startswith("AugmentBipedalWalkerHardcore")):
            if (env_name.startswith("AugmentBipedalWalkerHardcoreSmallLegs")):
                from box2d.bipedal_walker import AugmentBipedalWalkerHardcoreSmallLegs
                env = AugmentBipedalWalkerHardcoreSmallLegs()
            else:
                from box2d.bipedal_walker import AugmentBipedalWalkerHardcore
                env = AugmentBipedalWalkerHardcore()
        elif (env_name.startswith("AugmentBipedalWalkerSmallLegs")):
            from box2d.bipedal_walker import AugmentBipedalWalkerSmallLegs
            env = AugmentBipedalWalkerSmallLegs()
        elif (env_name.startswith("AugmentBipedalWalkerTallLegs")):
            from box2d.bipedal_walker import AugmentBipedalWalkerTallLegs
            env = AugmentBipedalWalkerTallLegs()
        else:
            from box2d.bipedal_walker import AugmentBipedalWalker
            env = AugmentBipedalWalker()
    elif (env_name.startswith("MinitaurBulletEnv")):
        print("bullet_minitaur_started")
        env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode)
    elif (env_name.startswith("MinitaurDuckBulletEnv")):
        print("bullet_minitaur_duck_started")
        env = MinitaurDuckBulletEnv(render=render_mode)
    elif (env_name.startswith("MinitaurBallBulletEnv")):
        print("bullet_minitaur_ball_started")
        env = MinitaurBallBulletEnv(render=render_mode)
    elif (env_name.startswith("KukaBulletEnv")):
        print("bullet_kuka_grasping started")
        env = kukaGymEnv.KukaGymEnv(renders=render_mode, isDiscrete=False)
    else:
        if env_name.startswith("Augment"):
            import robogym
        if env_name.startswith("AugmentAnt"):
            from robogym import AugmentAnt
            env = AugmentAnt()
        elif env_name.startswith("AugmentHopper"):
            from robogym import AugmentHopper
            env = AugmentHopper()
        elif env_name.startswith("AugmentHalfCheetah"):
            from robogym import AugmentHalfCheetah
            env = AugmentHalfCheetah()
        else:
            env = gym.make(env_name)
        if render_mode and not env_name.startswith("Augment"):
            env.render("human")
    if (seed >= 0):
        env.seed(seed)
    '''
  print("environment details")
  print("env.action_space", env.action_space)
  print("high, low", env.action_space.high, env.action_space.low)
  print("environment details")
  print("env.observation_space", env.observation_space)
  print("high, low", env.observation_space.high, env.observation_space.low)
  assert False
  '''
    return env
Exemple #20
0
def make_env(env_name, seed=-1, render_mode=False):
    if (env_name.startswith("RacecarBulletEnv")):
        import pybullet as p
        import pybullet_envs
        import pybullet_envs.bullet.racecarGymEnv as racecarGymEnv
        print("bullet_racecar_started")
        env = racecarGymEnv.RacecarGymEnv(isDiscrete=False,
                                          renders=render_mode)
    elif (env_name.startswith("RocketLander")):
        from box2d.rocket import RocketLander
        env = RocketLander()
    elif (env_name.startswith("BipedalWalker")):
        if (env_name.startswith("BipedalWalkerHardcore")):
            from box2d.biped import BipedalWalkerHardcore
            env = BipedalWalkerHardcore()
        else:
            from box2d.biped import BipedalWalker
            env = BipedalWalker()
    elif (env_name.startswith("MinitaurBulletEnv")):
        import pybullet as p
        import pybullet_envs
        import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
        print("bullet_minitaur_started")
        env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode)
    elif (env_name.startswith("MinitaurDuckBulletEnv")):
        print("bullet_minitaur_duck_started")
        import pybullet as p
        import pybullet_envs
        from custom_envs.minitaur_duck import MinitaurDuckBulletEnv
        env = MinitaurDuckBulletEnv(render=render_mode)
    elif (env_name.startswith("MinitaurBallBulletEnv")):
        print("bullet_minitaur_ball_started")
        import pybullet as p
        import pybullet_envs
        from custom_envs.minitaur_ball import MinitaurBallBulletEnv
        env = MinitaurBallBulletEnv(render=render_mode)
    elif (env_name.startswith("SlimeVolley")):
        print("slimevolley_swingup_started")
        from custom_envs.slimevolley import SlimeVolleyEnv, SurvivalRewardEnv
        env = SlimeVolleyEnv()
        env = SurvivalRewardEnv(env)  # optional
    elif (env_name.startswith("CartPoleSwingUp")):
        print("cartpole_swingup_started")
        from custom_envs.cartpole_swingup import CartPoleSwingUpEnv
        env = CartPoleSwingUpEnv()
    elif (env_name.startswith("KukaBulletEnv")):
        import pybullet as p
        import pybullet_envs
        import pybullet_envs.bullet.kukaGymEnv as kukaGymEnv
        print("bullet_kuka_grasping started")
        env = kukaGymEnv.KukaGymEnv(renders=render_mode, isDiscrete=False)
    elif (env_name.startswith("NoDeathAntBullet")):
        print("no death bullet ant.")
        import pybullet_envs
        from custom_envs.custom_wrapper import NoDeath
        env = NoDeath(gym.make("AntBulletEnv-v0"))
        if render_mode and not env_name.startswith("Roboschool"):
            env.render("human")
    else:
        if env_name.startswith("Roboschool"):
            import roboschool
        if "Bullet" in env_name:
            import pybullet_envs
        env = gym.make(env_name)
        if render_mode and not env_name.startswith("Roboschool"):
            env.render("human")
    if (seed >= 0):
        env.seed(seed)
    '''
  print("environment details")
  print("env.action_space", env.action_space)
  print("high, low", env.action_space.high, env.action_space.low)
  print("environment details")
  print("env.observation_space", env.observation_space)
  print("high, low", env.observation_space.high, env.observation_space.low)
  assert False
  '''
    return env
Exemple #21
0
import pybullet_envs.bullet.minitaur_gym_env as e
from stable_baselines import PPO2
from stable_baselines.common.policies import MlpPolicy

total_timesteps = 1000000
env = e.MinitaurBulletEnv(render=False)
model = PPO2(MlpPolicy,env=env,verbose=1,tensorboard_log="./tensorboard")
model.learn(total_timesteps=total_timesteps)
model.save("./model/model{}".format(total_timesteps))
print("model training done!")
import sys
sys.path.insert(0, '../..')

import libs_agents
import numpy
import time

import gym
from pybullet_envs.bullet import minitaur_gym_env
from pybullet_envs.bullet import minitaur_env_randomizer

randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
env = minitaur_gym_env.MinitaurBulletEnv(render=True,
                                         leg_model_enabled=False,
                                         motor_velocity_limit=numpy.inf,
                                         pd_control_enabled=True,
                                         accurate_motor_model_enabled=True,
                                         motor_overheat_protection=True,
                                         env_randomizer=randomizer,
                                         hard_reset=False)


class Wrapper(gym.Wrapper):
    def __init__(self, env):
        gym.Wrapper.__init__(self, env)

    def step(self, action):
        action_ = numpy.pi * (0.1 * action + 0.5)
        return self.env.step(action_)


env = Wrapper(env)
import gym

#got this from # https://usermanual.wiki/Document/pybullet20quickstart20guide.479068914/help
#
import pybullet_envs.bullet.minitaur_gym_env as e
env = e.MinitaurBulletEnv(render=True)


class MultiEnv(gym.Env):
    def __init__(self, env_config):
        # pick actual env based on worker and env indexes
        self.env = gym.make(
            choose_env_for(env_config.worker_index, env_config.vector_index))
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space

    def reset(self):
        return self.env.reset()

    def step(self, action):
        return self.env.step(action)


register_env("multienv", lambda config: MultiEnv(config))

trainer = ppo.PPOTrainer(env="my_env")