def __init__(self,
              ctrl_cost_coeff=1e-2,
              contact_cost_coeff=1e-3,
              survive_reward=5e-2,
              *args,
              **kwargs):
     MultiDirectionBaseEnv.__init__(self,
                                    ctrl_cost_coeff=ctrl_cost_coeff,
                                    contact_cost_coeff=contact_cost_coeff,
                                    survive_reward=survive_reward,
                                    *args,
                                    **kwargs)
     AntEnv.__init__(self, *args, **kwargs)
Exemple #2
0
def run_experiment(variant):
    if variant['env_name'] == 'humanoid-rllab':
        env = normalize(HumanoidEnv())
    elif variant['env_name'] == 'swimmer-rllab':
        env = normalize(SwimmerEnv())
    elif variant['env_name'] == 'ant-rllab':
        env = normalize(AntEnv())
    elif variant['env_name'] == 'BlocksSimpleXYQ-v0':
        target = [-1.0, 0.0]
        env = bsmp.BlocksSimpleXYQ(multi_goal=variant['blocks_multigoal'],
                                   time_limit=variant['max_path_length'],
                                   env_config=variant['blocks_simple_xml'],
                                   goal=target)
        env = env_wrap.obsTupleWrap(env, add_action_to_obs=False)
        env = gym_env.GymEnv(
            env,
            video_schedule=glob.video_scheduler.video_schedule,
            log_dir=".")
    else:
        env = normalize(GymEnv(variant['env_name']))

    pool = SimpleReplayBuffer(env=env,
                              max_replay_buffer_size=variant['max_pool_size'])

    sampler = SimpleSampler(max_path_length=variant['max_path_length'],
                            min_pool_size=variant['max_path_length'],
                            batch_size=variant['batch_size'])

    base_kwargs = dict(epoch_length=variant['epoch_length'],
                       n_epochs=variant['n_epochs'],
                       n_train_repeat=variant['n_train_repeat'],
                       eval_render=False,
                       eval_n_episodes=1,
                       sampler=sampler)

    M = variant['layer_size']
    qf = NNQFunction(env=env, hidden_layer_sizes=(M, M))

    policy = StochasticNNPolicy(env=env, hidden_layer_sizes=(M, M))

    algorithm = SQL(
        base_kwargs=base_kwargs,
        env=env,
        pool=pool,
        qf=qf,
        policy=policy,
        kernel_fn=adaptive_isotropic_gaussian_kernel,
        kernel_n_particles=variant['kernel_particles'],
        kernel_update_ratio=variant['kernel_update_ratio'],
        value_n_particles=variant['value_n_particles'],
        td_target_update_interval=variant['td_target_update_interval'],
        qf_lr=variant['qf_lr'],
        policy_lr=variant['policy_lr'],
        discount=variant['discount'],
        reward_scale=variant['reward_scale'],
        save_full_state=False)

    algorithm.train()
Exemple #3
0
def create_env(which_agent):

    # setup environment
    if (which_agent == 0):
        env = PointEnv()
        dt_from_xml = env.model.opt.timestep
        env = normalize(env)
    elif (which_agent == 1):
        env = AntEnv()
        dt_from_xml = env.model.opt.timestep
        env = normalize(env)
    elif (which_agent == 2):
        env = SwimmerEnv()
        dt_from_xml = env.model.opt.timestep
        env = normalize(SwimmerEnv())  #dt 0.001 and frameskip=150
    elif (which_agent == 3):
        env = ReacherEnv()
        dt_from_xml = env.model.opt.timestep
    elif (which_agent == 4):
        env = HalfCheetahEnv()
        dt_from_xml = env.model.opt.timestep
        env = normalize(env)


#     elif(which_agent==5):
#         env = RoachEnv() #this is a personal vrep env
#         dt_from_xml = env.VREP_DT
    elif (which_agent == 6):
        env = HopperEnv()
        dt_from_xml = env.model.opt.timestep
        env = normalize(env)
    elif (which_agent == 7):
        env = Walker2DEnv()
        dt_from_xml = env.model.opt.timestep
        env = normalize(env)

    #get dt value from env - DOES NOT WORK !!!!
    #     if(which_agent==5):
    #         dt_from_xml = env.VREP_DT
    #     else:
    #         dt_from_xml = env.model.opt.timestep
    print("\n\n the dt is: ", dt_from_xml, "\n\n")

    #set vars
    tf.set_random_seed(2)
    gym.logger.setLevel(gym.logging.WARNING)
    dimO = env.observation_space.shape
    dimA = env.action_space.shape
    print('--------------------------------- \nState space dimension: ', dimO)
    print('Action space dimension: ', dimA,
          "\n -----------------------------------")

    return env, dt_from_xml
Exemple #4
0
def run_experiment(variant):
    if variant['env_name'] == 'humanoid-rllab':
        env = normalize(HumanoidEnv())
    elif variant['env_name'] == 'swimmer-rllab':
        env = normalize(SwimmerEnv())
    elif variant['env_name'] == 'ant-rllab':
        env = normalize(AntEnv())
    elif variant['env_name'] == 'sawyer-rllab':
        env = normalize(SawyerTestEnv())
    elif variant['env_name'] == 'arm3Ddisc-rllab':
        env = normalize(Arm3dDiscEnv())
    else:
        env = normalize(GymEnv(variant['env_name']))

    pool = SimpleReplayBuffer(
        env_spec=env.spec, max_replay_buffer_size=variant['max_pool_size'])

    sampler = SimpleSampler(
        max_path_length=variant['max_path_length'],
        min_pool_size=variant['max_path_length'],
        batch_size=variant['batch_size'])

    base_kwargs = dict(
        epoch_length=variant['epoch_length'],
        n_epochs=variant['n_epochs'],
        n_train_repeat=variant['n_train_repeat'],
        eval_render=False,
        eval_n_episodes=1,
        sampler=sampler)

    M = variant['layer_size']
    qf = NNQFunction(env_spec=env.spec, hidden_layer_sizes=(M, M))

    policy = StochasticNNPolicy(env_spec=env.spec, hidden_layer_sizes=(M, M))

    algorithm = SQL(
        base_kwargs=base_kwargs,
        env=env,
        pool=pool,
        qf=qf,
        policy=policy,
        kernel_fn=adaptive_isotropic_gaussian_kernel,
        kernel_n_particles=variant['kernel_particles'],
        kernel_update_ratio=variant['kernel_update_ratio'],
        value_n_particles=variant['value_n_particles'],
        td_target_update_interval=variant['td_target_update_interval'],
        qf_lr=variant['qf_lr'],
        policy_lr=variant['policy_lr'],
        discount=variant['discount'],
        reward_scale=variant['reward_scale'],
        save_full_state=False)

    algorithm.train()
Exemple #5
0
    def _setup_world(self, filename):
        """
        Helper method for handling setup of the MuJoCo world.
        Args:
            filename: Path to XML file containing the world information.
        """
        self._world = []
        self._model = []

        # Initialize Mujoco worlds. If there's only one xml file, create a single world object,
        # otherwise create a different world for each condition.
        for i in range(self._hyperparams['conditions']):
            self._world.append(AntEnv())
        # Initialize x0.
        self.x0 = []
        for i in range(self._hyperparams['conditions']):
            self.x0.append(self._world[i].reset())
Exemple #6
0
    def __init__(self, args):
        self.args = args
        self.device = torch.device(
            'cuda'
        ) if args.cuda and torch.cuda.is_available() else torch.device('cpu')

        if self.args.env_name == 'ant':
            from rllab.envs.mujoco.ant_env import AntEnv
            env = AntEnv()
            # set the target velocity direction (for learning sub-policies)
            env.velocity_dir = self.args.velocity_dir
            env.penalty = self.args.penalty

            # use gym environment observation
            env.use_gym_obs = self.args.use_gym_obs
            # use gym environment reward
            env.use_gym_reward = self.args.use_gym_reward

        elif self.args.env_name == 'swimmer':
            from rllab.envs.mujoco.swimmer_env import SwimmerEnv
            env = SwimmerEnv()
            env.velocity_dir = self.args.velocity_dir
        else:
            raise NotImplementedError

        self.env = normalize(env)
        self.reset_env()

        self.obs_shape = self.env.observation_space.shape

        self.actor_critic = self.select_network().to(self.device)
        self.optimizer = self.select_optimizer()

        # list of RolloutStorage objects
        self.episodes_rollout = []
        # concatenation of all episodes' rollout
        self.rollouts = RolloutStorage(self.device)
        # this directory is used for tensorboardX only
        self.writer = SummaryWriter(args.log_dir + self.args.velocity_dir)

        self.episodes = 0
        self.episode_steps = []
        self.train_rewards = []
def get_env_settings(env_id="", normalize_env=True, gym_name="",
                     env_params=None):
    if env_params is None:
        env_params = {}

    if env_id == 'cart':
        env = CartpoleEnv()
        name = "Cartpole"
    elif env_id == 'cheetah':
        env = HalfCheetahEnv()
        name = "HalfCheetah"
    elif env_id == 'ant':
        env = AntEnv()
        name = "Ant"
    elif env_id == 'point':
        env = gym_env("OneDPoint-v0")
        name = "OneDPoint"
    elif env_id == 'reacher':
        env = gym_env("Reacher-v1")
        name = "Reacher"
    elif env_id == 'idp':
        env = InvertedDoublePendulumEnv()
        name = "InvertedDoublePendulum"
    elif env_id == 'ocm':
        env = OneCharMemory(**env_params)
        name = "OneCharMemory"
    elif env_id == 'gym':
        if gym_name == "":
            raise Exception("Must provide a gym name")
        env = gym_env(gym_name)
        name = gym_name
    else:
        raise Exception("Unknown env: {0}".format(env_id))
    if normalize_env:
        env = normalize(env)
        name += "-normalized"
    return dict(
        env=env,
        name=name,
        was_env_normalized=normalize_env,
    )
Exemple #8
0
def create_env(which_agent):

    # setup environment
    if (which_agent == 0):
        env = normalize(PointEnv())
    elif (which_agent == 1):
        env = normalize(AntEnv())
    elif (which_agent == 2):
        env = normalize(SwimmerEnv())  #dt 0.001 and frameskip=150
    elif (which_agent == 3):
        env = gym.make("modified_gym_env:ReacherPyBulletEnv-v1")
    elif (which_agent == 4):
        env = normalize(HalfCheetahEnv())
    elif (which_agent == 5):
        env = RoachEnv()  #this is a personal vrep env
    elif (which_agent == 6):
        env = normalize(HopperEnv())
    elif (which_agent == 7):
        env = normalize(Walker2DEnv())

    #get dt value from env
    if (which_agent == 5):
        dt_from_xml = env.VREP_DT
    elif (which_agent == 3):
        dt_from_xml = 0.02
    else:
        dt_from_xml = env.model.opt.timestep
    print("\n\n the dt is: ", dt_from_xml, "\n\n")

    #set vars
    tf.set_random_seed(2)
    gym.logger.setLevel(logging.WARN)
    dimO = env.observation_space.shape
    dimA = env.action_space.shape
    print('--------------------------------- \nState space dimension: ', dimO)
    print('Action space dimension: ', dimA,
          "\n -----------------------------------")

    return env, dt_from_xml
Exemple #9
0
def get(perm):
    name = perm["problem"]
    if name.lower() == "cartpole":
        from rllab.envs.box2d.cartpole_env import CartpoleEnv
        return normalize(CartpoleEnv())

    elif name.lower() == "mountain car height bonus":
        from rllab.envs.box2d.mountain_car_env import MountainCarEnv
        return normalize(MountainCarEnv())

    elif name.lower() == "mountain car":
        from rllab.envs.box2d.mountain_car_env import MountainCarEnv
        return normalize(MountainCarEnv(height_bonus=0))

    elif name.lower() == "gym mountain car":
        from rllab.envs.gym_env import GymEnv
        return normalize(GymEnv("MountainCarContinuous-v0",
                                record_video=False))

    elif name.lower() == "pendulum":
        from rllab.envs.gym_env import GymEnv
        return normalize(GymEnv("Pendulum-v0", record_video=False))

    elif name.lower() == "mujoco double pendulum":
        from rllab.envs.mujoco.inverted_double_pendulum_env import InvertedDoublePendulumEnv
        return normalize(InvertedDoublePendulumEnv())

    elif name.lower() == "double pendulum":
        from rllab.envs.box2d.double_pendulum_env import DoublePendulumEnv
        return normalize(DoublePendulumEnv())

    elif name.lower() == "hopper":
        from rllab.envs.mujoco.hopper_env import HopperEnv
        return normalize(HopperEnv())

    elif name.lower() == "swimmer":
        from rllab.envs.mujoco.swimmer_env import SwimmerEnv
        return normalize(SwimmerEnv())

    elif name.lower() == "2d walker":
        from rllab.envs.mujoco.walker2d_env import Walker2DEnv
        return normalize(Walker2DEnv())

    elif name.lower() == "half cheetah":
        from rllab.envs.mujoco.half_cheetah_env import HalfCheetahEnv
        return normalize(HalfCheetahEnv())

    elif name.lower() == "ant":
        from rllab.envs.mujoco.ant_env import AntEnv
        return normalize(AntEnv())

    elif name.lower() == "simple humanoid":
        from rllab.envs.mujoco.simple_humanoid_env import SimpleHumanoidEnv
        return normalize(SimpleHumanoidEnv())

    elif name.lower() == "full humanoid":
        from rllab.envs.mujoco.humanoid_env import HumanoidEnv
        return normalize(HumanoidEnv())

    else:
        raise NotImplementedError(f"Environment {name} unknown")
Exemple #10
0
from rllab.algos.ppo import PPO
from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
from rllab.envs.mujoco.ant_env import AntEnv
from rllab.envs.normalized_env import normalize
from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy

env = normalize(AntEnv())

policy = GaussianMLPPolicy(
    env_spec=env.spec,
    # The neural network policy should have two hidden layers, each with 32 hidden units.
    hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = PPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=4000,
    max_path_length=500,
    n_itr=4000,
    discount=0.99,
    step_size=0.01,
)
algo.train()