def __init__(self, ctrl_cost_coeff=1e-2, contact_cost_coeff=1e-3, survive_reward=5e-2, *args, **kwargs): MultiDirectionBaseEnv.__init__(self, ctrl_cost_coeff=ctrl_cost_coeff, contact_cost_coeff=contact_cost_coeff, survive_reward=survive_reward, *args, **kwargs) AntEnv.__init__(self, *args, **kwargs)
def run_experiment(variant): if variant['env_name'] == 'humanoid-rllab': env = normalize(HumanoidEnv()) elif variant['env_name'] == 'swimmer-rllab': env = normalize(SwimmerEnv()) elif variant['env_name'] == 'ant-rllab': env = normalize(AntEnv()) elif variant['env_name'] == 'BlocksSimpleXYQ-v0': target = [-1.0, 0.0] env = bsmp.BlocksSimpleXYQ(multi_goal=variant['blocks_multigoal'], time_limit=variant['max_path_length'], env_config=variant['blocks_simple_xml'], goal=target) env = env_wrap.obsTupleWrap(env, add_action_to_obs=False) env = gym_env.GymEnv( env, video_schedule=glob.video_scheduler.video_schedule, log_dir=".") else: env = normalize(GymEnv(variant['env_name'])) pool = SimpleReplayBuffer(env=env, max_replay_buffer_size=variant['max_pool_size']) sampler = SimpleSampler(max_path_length=variant['max_path_length'], min_pool_size=variant['max_path_length'], batch_size=variant['batch_size']) base_kwargs = dict(epoch_length=variant['epoch_length'], n_epochs=variant['n_epochs'], n_train_repeat=variant['n_train_repeat'], eval_render=False, eval_n_episodes=1, sampler=sampler) M = variant['layer_size'] qf = NNQFunction(env=env, hidden_layer_sizes=(M, M)) policy = StochasticNNPolicy(env=env, hidden_layer_sizes=(M, M)) algorithm = SQL( base_kwargs=base_kwargs, env=env, pool=pool, qf=qf, policy=policy, kernel_fn=adaptive_isotropic_gaussian_kernel, kernel_n_particles=variant['kernel_particles'], kernel_update_ratio=variant['kernel_update_ratio'], value_n_particles=variant['value_n_particles'], td_target_update_interval=variant['td_target_update_interval'], qf_lr=variant['qf_lr'], policy_lr=variant['policy_lr'], discount=variant['discount'], reward_scale=variant['reward_scale'], save_full_state=False) algorithm.train()
def create_env(which_agent): # setup environment if (which_agent == 0): env = PointEnv() dt_from_xml = env.model.opt.timestep env = normalize(env) elif (which_agent == 1): env = AntEnv() dt_from_xml = env.model.opt.timestep env = normalize(env) elif (which_agent == 2): env = SwimmerEnv() dt_from_xml = env.model.opt.timestep env = normalize(SwimmerEnv()) #dt 0.001 and frameskip=150 elif (which_agent == 3): env = ReacherEnv() dt_from_xml = env.model.opt.timestep elif (which_agent == 4): env = HalfCheetahEnv() dt_from_xml = env.model.opt.timestep env = normalize(env) # elif(which_agent==5): # env = RoachEnv() #this is a personal vrep env # dt_from_xml = env.VREP_DT elif (which_agent == 6): env = HopperEnv() dt_from_xml = env.model.opt.timestep env = normalize(env) elif (which_agent == 7): env = Walker2DEnv() dt_from_xml = env.model.opt.timestep env = normalize(env) #get dt value from env - DOES NOT WORK !!!! # if(which_agent==5): # dt_from_xml = env.VREP_DT # else: # dt_from_xml = env.model.opt.timestep print("\n\n the dt is: ", dt_from_xml, "\n\n") #set vars tf.set_random_seed(2) gym.logger.setLevel(gym.logging.WARNING) dimO = env.observation_space.shape dimA = env.action_space.shape print('--------------------------------- \nState space dimension: ', dimO) print('Action space dimension: ', dimA, "\n -----------------------------------") return env, dt_from_xml
def run_experiment(variant): if variant['env_name'] == 'humanoid-rllab': env = normalize(HumanoidEnv()) elif variant['env_name'] == 'swimmer-rllab': env = normalize(SwimmerEnv()) elif variant['env_name'] == 'ant-rllab': env = normalize(AntEnv()) elif variant['env_name'] == 'sawyer-rllab': env = normalize(SawyerTestEnv()) elif variant['env_name'] == 'arm3Ddisc-rllab': env = normalize(Arm3dDiscEnv()) else: env = normalize(GymEnv(variant['env_name'])) pool = SimpleReplayBuffer( env_spec=env.spec, max_replay_buffer_size=variant['max_pool_size']) sampler = SimpleSampler( max_path_length=variant['max_path_length'], min_pool_size=variant['max_path_length'], batch_size=variant['batch_size']) base_kwargs = dict( epoch_length=variant['epoch_length'], n_epochs=variant['n_epochs'], n_train_repeat=variant['n_train_repeat'], eval_render=False, eval_n_episodes=1, sampler=sampler) M = variant['layer_size'] qf = NNQFunction(env_spec=env.spec, hidden_layer_sizes=(M, M)) policy = StochasticNNPolicy(env_spec=env.spec, hidden_layer_sizes=(M, M)) algorithm = SQL( base_kwargs=base_kwargs, env=env, pool=pool, qf=qf, policy=policy, kernel_fn=adaptive_isotropic_gaussian_kernel, kernel_n_particles=variant['kernel_particles'], kernel_update_ratio=variant['kernel_update_ratio'], value_n_particles=variant['value_n_particles'], td_target_update_interval=variant['td_target_update_interval'], qf_lr=variant['qf_lr'], policy_lr=variant['policy_lr'], discount=variant['discount'], reward_scale=variant['reward_scale'], save_full_state=False) algorithm.train()
def _setup_world(self, filename): """ Helper method for handling setup of the MuJoCo world. Args: filename: Path to XML file containing the world information. """ self._world = [] self._model = [] # Initialize Mujoco worlds. If there's only one xml file, create a single world object, # otherwise create a different world for each condition. for i in range(self._hyperparams['conditions']): self._world.append(AntEnv()) # Initialize x0. self.x0 = [] for i in range(self._hyperparams['conditions']): self.x0.append(self._world[i].reset())
def __init__(self, args): self.args = args self.device = torch.device( 'cuda' ) if args.cuda and torch.cuda.is_available() else torch.device('cpu') if self.args.env_name == 'ant': from rllab.envs.mujoco.ant_env import AntEnv env = AntEnv() # set the target velocity direction (for learning sub-policies) env.velocity_dir = self.args.velocity_dir env.penalty = self.args.penalty # use gym environment observation env.use_gym_obs = self.args.use_gym_obs # use gym environment reward env.use_gym_reward = self.args.use_gym_reward elif self.args.env_name == 'swimmer': from rllab.envs.mujoco.swimmer_env import SwimmerEnv env = SwimmerEnv() env.velocity_dir = self.args.velocity_dir else: raise NotImplementedError self.env = normalize(env) self.reset_env() self.obs_shape = self.env.observation_space.shape self.actor_critic = self.select_network().to(self.device) self.optimizer = self.select_optimizer() # list of RolloutStorage objects self.episodes_rollout = [] # concatenation of all episodes' rollout self.rollouts = RolloutStorage(self.device) # this directory is used for tensorboardX only self.writer = SummaryWriter(args.log_dir + self.args.velocity_dir) self.episodes = 0 self.episode_steps = [] self.train_rewards = []
def get_env_settings(env_id="", normalize_env=True, gym_name="", env_params=None): if env_params is None: env_params = {} if env_id == 'cart': env = CartpoleEnv() name = "Cartpole" elif env_id == 'cheetah': env = HalfCheetahEnv() name = "HalfCheetah" elif env_id == 'ant': env = AntEnv() name = "Ant" elif env_id == 'point': env = gym_env("OneDPoint-v0") name = "OneDPoint" elif env_id == 'reacher': env = gym_env("Reacher-v1") name = "Reacher" elif env_id == 'idp': env = InvertedDoublePendulumEnv() name = "InvertedDoublePendulum" elif env_id == 'ocm': env = OneCharMemory(**env_params) name = "OneCharMemory" elif env_id == 'gym': if gym_name == "": raise Exception("Must provide a gym name") env = gym_env(gym_name) name = gym_name else: raise Exception("Unknown env: {0}".format(env_id)) if normalize_env: env = normalize(env) name += "-normalized" return dict( env=env, name=name, was_env_normalized=normalize_env, )
def create_env(which_agent): # setup environment if (which_agent == 0): env = normalize(PointEnv()) elif (which_agent == 1): env = normalize(AntEnv()) elif (which_agent == 2): env = normalize(SwimmerEnv()) #dt 0.001 and frameskip=150 elif (which_agent == 3): env = gym.make("modified_gym_env:ReacherPyBulletEnv-v1") elif (which_agent == 4): env = normalize(HalfCheetahEnv()) elif (which_agent == 5): env = RoachEnv() #this is a personal vrep env elif (which_agent == 6): env = normalize(HopperEnv()) elif (which_agent == 7): env = normalize(Walker2DEnv()) #get dt value from env if (which_agent == 5): dt_from_xml = env.VREP_DT elif (which_agent == 3): dt_from_xml = 0.02 else: dt_from_xml = env.model.opt.timestep print("\n\n the dt is: ", dt_from_xml, "\n\n") #set vars tf.set_random_seed(2) gym.logger.setLevel(logging.WARN) dimO = env.observation_space.shape dimA = env.action_space.shape print('--------------------------------- \nState space dimension: ', dimO) print('Action space dimension: ', dimA, "\n -----------------------------------") return env, dt_from_xml
def get(perm): name = perm["problem"] if name.lower() == "cartpole": from rllab.envs.box2d.cartpole_env import CartpoleEnv return normalize(CartpoleEnv()) elif name.lower() == "mountain car height bonus": from rllab.envs.box2d.mountain_car_env import MountainCarEnv return normalize(MountainCarEnv()) elif name.lower() == "mountain car": from rllab.envs.box2d.mountain_car_env import MountainCarEnv return normalize(MountainCarEnv(height_bonus=0)) elif name.lower() == "gym mountain car": from rllab.envs.gym_env import GymEnv return normalize(GymEnv("MountainCarContinuous-v0", record_video=False)) elif name.lower() == "pendulum": from rllab.envs.gym_env import GymEnv return normalize(GymEnv("Pendulum-v0", record_video=False)) elif name.lower() == "mujoco double pendulum": from rllab.envs.mujoco.inverted_double_pendulum_env import InvertedDoublePendulumEnv return normalize(InvertedDoublePendulumEnv()) elif name.lower() == "double pendulum": from rllab.envs.box2d.double_pendulum_env import DoublePendulumEnv return normalize(DoublePendulumEnv()) elif name.lower() == "hopper": from rllab.envs.mujoco.hopper_env import HopperEnv return normalize(HopperEnv()) elif name.lower() == "swimmer": from rllab.envs.mujoco.swimmer_env import SwimmerEnv return normalize(SwimmerEnv()) elif name.lower() == "2d walker": from rllab.envs.mujoco.walker2d_env import Walker2DEnv return normalize(Walker2DEnv()) elif name.lower() == "half cheetah": from rllab.envs.mujoco.half_cheetah_env import HalfCheetahEnv return normalize(HalfCheetahEnv()) elif name.lower() == "ant": from rllab.envs.mujoco.ant_env import AntEnv return normalize(AntEnv()) elif name.lower() == "simple humanoid": from rllab.envs.mujoco.simple_humanoid_env import SimpleHumanoidEnv return normalize(SimpleHumanoidEnv()) elif name.lower() == "full humanoid": from rllab.envs.mujoco.humanoid_env import HumanoidEnv return normalize(HumanoidEnv()) else: raise NotImplementedError(f"Environment {name} unknown")
from rllab.algos.ppo import PPO from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline from rllab.envs.mujoco.ant_env import AntEnv from rllab.envs.normalized_env import normalize from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy env = normalize(AntEnv()) policy = GaussianMLPPolicy( env_spec=env.spec, # The neural network policy should have two hidden layers, each with 32 hidden units. hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = PPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=500, n_itr=4000, discount=0.99, step_size=0.01, ) algo.train()