def _get_viewer(self, camera_id): if self.viewer is None: from mujoco_py import GlfwContext GlfwContext(offscreen=True) self.viewer = mujoco_py.MjRenderContextOffscreen(self._env.sim, -1) self.viewer_setup(camera_id) return self.viewer
def __init__(self, distance_threshold=0.02, n_substeps=N_SUBSTEPS, relative_control=True, initial_qpos=DEFAULT_INITIAL_QPOS, success_multiplier=0.1, force_finger=None): # init rendering [IMPORTANT] from mujoco_py import GlfwContext GlfwContext(offscreen=True ) # in newer version of gym use quiet=True to silence this self.total_steps = 0 super().__init__(distance_threshold=distance_threshold, n_substeps=n_substeps, relative_control=relative_control, initial_qpos=initial_qpos, success_multiplier=success_multiplier, force_finger=force_finger) # set hand and background colors self.sim.model.mat_rgba[2] = np.array([16, 18, 35, 255]) / 255 self.sim.model.mat_rgba[4] = np.array([104, 143, 71, 255]) / 255 self.sim.model.geom_rgba[48] = np.array([0.5, 0.5, 0.5, 0]) # get touch sensor site names and their ids self._touch_sensor_id_site_id = [] self._touch_sensor_id = [] for k, v in self.sim.model._sensor_name2id.items(): if 'robot0:TS_' in k: self._touch_sensor_id_site_id.append( (v, self.sim.model._site_name2id[k.replace( 'robot0:TS_', 'robot0:T_')])) self._touch_sensor_id.append(v)
def mujocopy_render_hack(): render_hack = false # set to true for bugfix on bad openGL context if render_hack: print("Setting an offscreen GlfwContext. See mujoco-py issue #390") from mujoco_py import GlfwContext GlfwContext(offscreen=True) # Create a window to init GLFW. return
def start( self, lock, train=True, ): if not self.cluster: GlfwContext(offscreen=True) # Create a window to init GLFW. self.collectData(train, lock)
def __init__(self, past_frames=4): GlfwContext(offscreen=True) _FrameBufferEnv.__init__(self, past_frames) self._initialized = False path_to_xml = os.path.join(os.path.dirname(__file__), 'assets/hopper_flexible.xml') mujoco_env.MujocoEnv.__init__(self, path_to_xml, 4) utils.EzPickle.__init__(self)
def __init__(self, mode='hard', past_frames=4, l2_penalty=False): GlfwContext(offscreen=True) _FrameBufferEnv.__init__(self, past_frames) self._mode = mode self._l2_penalty = l2_penalty utils.EzPickle.__init__(self) path_to_xml = os.path.join(os.path.dirname(__file__), 'assets/custom_reacher_3_link.xml') mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2)
def __init__(self, past_frames=4): GlfwContext(offscreen=True) _FrameBufferEnv.__init__(self, past_frames) self._initialized = False path_to_xml = os.path.join(os.path.dirname(__file__), 'assets/pusher_human_sim.xml') mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2) utils.EzPickle.__init__(self) self.reset_model()
def __init__(self, past_frames=4, action_penalties=True): GlfwContext(offscreen=True) _FrameBufferEnv.__init__(self, past_frames) self._initialized = False self._action_penalties = action_penalties path_to_xml = os.path.join(os.path.dirname(__file__), 'assets/custom_half_cheetah.xml') mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2) utils.EzPickle.__init__(self)
def __init__(self, past_frames=4): GlfwContext(offscreen=True) _FrameBufferEnv.__init__(self, past_frames) self._initialized = False utils.EzPickle.__init__(self) self._striked = False self._min_strike_dist = np.inf self.strike_threshold = 0.1 path_to_xml = os.path.join(os.path.dirname(__file__), 'assets/striker_human_sim.xml') mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2) #5)
def __init__(self, env, headless=True): if headless: # pylint: disable=import-outside-toplevel # this import fails without a valid mujoco license # so keep this here to avoid unecessarily requiring # a mujoco license everytime the wrappers package is # accessed. from mujoco_py import GlfwContext GlfwContext(offscreen=True) env.reset() env = gymWrapper(env) super().__init__(env) self._observation_space = env.observation_space['pixels']
def __init__(self, size=(32, 32), color_permutation=[0, 1, 2], smoothing_factor=0.0, past_frames=4, not_done=True): GlfwContext(offscreen=True) self._size = size self._not_done = not_done self._color_permutation = color_permutation self._smooth = 1.0 - smoothing_factor _FrameBufferEnv.__init__(self, past_frames) utils.EzPickle.__init__(self) mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
def __init__(self, size=(32, 32), color_permutation=[0, 1, 2], smoothing_factor=0.0, past_frames=4, not_done=True): GlfwContext(offscreen=True) self._size = size self._not_done = not_done self._failure = False self._color_permutation = color_permutation self._smooth = 1.0 - smoothing_factor _FrameBufferEnv.__init__(self, past_frames) utils.EzPickle.__init__(self) path_to_xml = os.path.join( os.path.dirname(__file__), 'assets/custom_inverted_double_pendulum.xml') mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2)
import torch import argparse import sys from lifelong_rl.envs.env_processor import make_env import lifelong_rl.torch.pytorch_util as ptu from lifelong_rl.util.visualize_mujoco import record_mujoco_video_from_states from mujoco_py import GlfwContext GlfwContext(offscreen=True) # Create a window to init GLFW. """ Visualize replay buffer of agent and store as .mp4 """ def get_env_states(snapshot_name): with open(snapshot_name + '.pt', 'rb') as f: snapshot = torch.load(f, map_location='cpu') env_states = snapshot['replay_buffer/env_states'] return env_states parser = argparse.ArgumentParser() parser.add_argument( '--snapshot', '-name', type=str, help='Name of snapshot to visualize (ex. 12-07-hopper/run_1/itr_999') parser.add_argument('--env', type=str,
def __init__(self, model_path, target_position, target_rotation, target_position_range, reward_type, initial_qpos={}, randomize_initial_position=True, randomize_initial_rotation=True, distance_threshold=0.01, rotation_threshold=0.1, n_substeps=N_SUBSTEPS, relative_control=True, ignore_z_target_rotation=False, touch_visualisation="off", touch_get_obs="sensordata", visual_input: bool = False, max_steps=100): """Initializes a new Hand manipulation environment with touch sensors. Args: touch_visualisation (string): how touch sensor sites are visualised - "on_touch": shows touch sensor sites only when touch values > 0 - "always": always shows touch sensor sites - "off" or else: does not show touch sensor sites touch_get_obs (string): touch sensor readings - "boolean": returns 1 if touch sensor reading != 0.0 else 0 - "sensordata": returns original touch sensor readings from self.sim.data.sensordata[id] - "log": returns log(x+1) touch sensor readings from self.sim.data.sensordata[id] - "off" or else: does not add touch sensor readings to the observation visual_input (bool): indicator whether the environment should return frames (True) or the exact object position (False) max_steps (int): maximum number of steps before episode is ended """ if visual_input: # init rendering [IMPORTANT] from mujoco_py import GlfwContext GlfwContext( offscreen=True ) # in newer version of gym use quiet=True to silence this self.touch_visualisation = touch_visualisation self.touch_get_obs = touch_get_obs self.visual_input = visual_input self.touch_color = [1, 0, 0, 0.5] self.notouch_color = [0, 0.5, 0, 0.2] self.total_steps = 0 self.max_steps = max_steps manipulate.ManipulateEnv.__init__( self, model_path, target_position, target_rotation, target_position_range, reward_type, initial_qpos=initial_qpos, randomize_initial_position=randomize_initial_position, randomize_initial_rotation=randomize_initial_rotation, distance_threshold=distance_threshold, rotation_threshold=rotation_threshold, n_substeps=n_substeps, relative_control=relative_control, ignore_z_target_rotation=ignore_z_target_rotation, ) self._touch_sensor_id_site_id = [] self._touch_sensor_id = [] # get touch sensor site names and their ids for k, v in self.sim.model._sensor_name2id.items(): if 'robot0:TS_' in k: self._touch_sensor_id_site_id.append( (v, self.sim.model._site_name2id[k.replace( 'robot0:TS_', 'robot0:T_')])) self._touch_sensor_id.append(v) # set touch sensors rgba values if self.touch_visualisation == 'off': for _, site_id in self._touch_sensor_id_site_id: self.sim.model.site_rgba[site_id][3] = 0.0 elif self.touch_visualisation == 'always': pass # set hand and background colors self.sim.model.mat_rgba[2] = np.array([16, 18, 35, 255]) / 255 self.sim.model.mat_rgba[4] = np.array([104, 143, 71, 255]) / 255 self.sim.model.geom_rgba[48] = np.array([0.5, 0.5, 0.5, 0]) # set observation space self.observation_space = self._determine_observation_space()
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument( '--expert_policy_file', '-epf', type=str, required=True) # relative to where you're running this script from parser.add_argument( '--expert_data', '-ed', type=str, required=True) #relative to where you're running this script from parser.add_argument( '--env_name', '-env', type=str, help= 'choices: Ant-v2, Humanoid-v2, Walker-v2, HalfCheetah-v2, Hopper-v2', required=True) parser.add_argument('--exp_name', '-exp', type=str, default='pick an experiment name', required=True) parser.add_argument('--do_dagger', action='store_true') parser.add_argument('--ep_len', type=int) parser.add_argument( '--num_agent_train_steps_per_iter', type=int, default=1000 ) # number of gradient steps for training policy (per iter in n_iter) parser.add_argument('--n_iter', '-n', type=int, default=1) parser.add_argument( '--batch_size', type=int, default=1000 ) # training data collected (in the env) during each iteration parser.add_argument( '--eval_batch_size', type=int, default=1000) # eval data collected (in the env) for logging metrics parser.add_argument( '--train_batch_size', type=int, default=100 ) # number of sampled data points to be used per gradient/train step parser.add_argument('--n_layers', type=int, default=2) # depth, of policy to be learned parser.add_argument( '--size', type=int, default=64) # width of each layer, of policy to be learned parser.add_argument('--learning_rate', '-lr', type=float, default=5e-3) # LR for supervised learning parser.add_argument('--video_log_freq', type=int, default=5) parser.add_argument('--scalar_log_freq', type=int, default=1) parser.add_argument('--no_gpu', '-ngpu', action='store_true') parser.add_argument('--which_gpu', type=int, default=0) parser.add_argument('--max_replay_buffer_size', type=int, default=1000000) parser.add_argument('--save_params', action='store_true') parser.add_argument('--seed', type=int, default=1) args = parser.parse_args() # convert args to dictionary params = vars(args) ################################## ### CREATE DIRECTORY FOR LOGGING ################################## if args.do_dagger: # Use this prefix when submitting. The auto-grader uses this prefix. logdir_prefix = 'q2_' assert args.n_iter > 1, ( 'DAGGER needs more than 1 iteration (n_iter>1) of training, to iteratively query the expert and train (after 1st warmstarting from behavior cloning).' ) else: # Use this prefix when submitting. The auto-grader uses this prefix. logdir_prefix = 'q1_' assert args.n_iter == 1, ( 'Vanilla behavior cloning collects expert data just once (n_iter=1)' ) ## directory for logging data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data') if not (os.path.exists(data_path)): os.makedirs(data_path) logdir = logdir_prefix + args.exp_name + '_' + args.env_name + '_' + time.strftime( "%d-%m-%Y_%H-%M-%S") logdir = os.path.join(data_path, logdir) params['logdir'] = logdir if not (os.path.exists(logdir)): os.makedirs(logdir) ################### ### RUN TRAINING ################### GlfwContext(offscreen=True) # Create a window to init GLFW. trainer = BC_Trainer(params) trainer.run_training_loop()
def main(): args = parse_rl_args() logging.basicConfig(level=args.log_level) # Set a random seed used in PFRL. utils.set_random_seed(args.seed) # Set different random seeds for different subprocesses. # If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3]. # If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7]. process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs assert process_seeds.max() < 2**32 args.outdir = experiments.prepare_output_dir(args, args.outdir) print("Output files are saved in {}".format(args.outdir)) def make_ant_env(idx, test): # use different seeds for train vs test envs process_seed = int(process_seeds[idx]) env_seed = 2**32 - 1 - process_seed if test else process_seed # env_seed = np.random.randint(0, 2**32 - 1) if not test else process_seed print(env_seed) utils.set_random_seed(env_seed) # create the anv environment with goal env = AntEnvWithGoal(create_maze_env(args.env), args.env, env_subgoal_dim=15) env.seed(int(env_seed)) if args.render: env = pfrl.wrappers.GymLikeEnvRender(env, mode='human') return env def make_batch_ant__env(test): return pfrl.envs.MultiprocessVectorEnv([ functools.partial(make_ant_env, idx, test) for idx in range(args.num_envs) ]) eval_env = make_ant_env(0, test=True) env_state_dim = eval_env.state_dim env_action_dim = eval_env.action_dim env_subgoal_dim = eval_env.subgoal_dim # determined from the ant env if args.env == 'AntMaze' or args.env == 'AntPush': env_goal_dim = 2 else: env_goal_dim = 3 action_space = eval_env.action_space subgoal_space = eval_env.subgoal_space scale_low = action_space.high * np.ones(env_action_dim) scale_high = subgoal_space.high * np.ones(env_subgoal_dim) def low_level_burnin_action_func(): """Select random actions until model is updated one or more times.""" return np.random.uniform(action_space.low, action_space.high).astype(np.float32) def high_level_burnin_action_func(): """Select random actions until model is updated one or more times.""" return np.random.uniform(subgoal_space.low, subgoal_space.high).astype(np.float32) gpu = 0 if torch.cuda.is_available() else None agent = HIROAgent( state_dim=env_state_dim, action_dim=env_action_dim, goal_dim=env_goal_dim, subgoal_dim=env_subgoal_dim, high_level_burnin_action_func=high_level_burnin_action_func, low_level_burnin_action_func=low_level_burnin_action_func, scale_low=scale_low, scale_high=scale_high, buffer_size=200000, subgoal_freq=10, train_freq=10, reward_scaling=0.1, goal_threshold=5, gpu=gpu, add_entropy_layer=args.add_entropy_layer, soft_subgoal_update=args.soft_subgoal_update, temperature_high=args.temperature_high, temperature_low=args.temperature_low, optimize_high_temp=args.optimize_high_temp, optimize_low_temp=args.optimize_low_temp) if args.load: # load weights from a file if arg supplied agent.load(args.load) if args.record: from mujoco_py import GlfwContext GlfwContext(offscreen=True) if args.demo: eval_stats = experiments.eval_performance( env=eval_env, agent=agent, n_steps=None, n_episodes=args.eval_n_runs, video_outdir=args.outdir, step_number=-1 if args.record else None # justNonNoneObjectForRecording ) print("n_runs: {} mean: {} median: {} stdev {}".format( args.eval_n_runs, eval_stats["mean"], eval_stats["median"], eval_stats["stdev"], )) else: # train the hierarchical agent experiments.train_hrl_agent_with_evaluation(agent=agent, env=make_ant_env( 0, test=False), steps=args.steps, outdir=args.outdir, eval_n_steps=None, eval_interval=5000, eval_n_episodes=10, use_tensorboard=True, record=args.record)
from torch import device from mujoco_py.generated import const from mujoco_py import GlfwContext import numpy as np import cv2 GlfwContext(offscreen=True) class Play: def __init__(self, env, agent, env_name, max_episode=1): self.env = env self.max_episode = max_episode self.agent = agent _, self.state_rms_mean, self.state_rms_var = self.agent.load_weights() self.agent.set_to_eval_mode() self.device = device("cpu") self.fourcc = cv2.VideoWriter_fourcc(*'XVID') self.VideoWriter = cv2.VideoWriter(env_name + ".avi", self.fourcc, 50.0, (250, 250)) def evaluate(self): for _ in range(self.max_episode): s = self.env.reset() episode_reward = 0 for _ in range(self.env._max_episode_steps): s = np.clip((s - self.state_rms_mean) / (self.state_rms_var ** 0.5 + 1e-8), -5.0, 5.0) dist = self.agent.choose_dist(s) action = dist.sample().cpu().numpy()[0] s_, r, done, _ = self.env.step(action) episode_reward += r if done:
def main(): args = parse_rl_args() cfg = YAML().load( open(os.environ["FLIGHTMARE_PATH"] + "/flightlib/configs/vec_env.yaml", 'r')) if not args.train: cfg["env"]["num_envs"] = 1 cfg["env"]["num_threads"] = 1 if args.render: cfg["env"]["render"] = "yes" else: cfg["env"]["render"] = "no" logging.basicConfig(level=args.log_level) # Set a random seed used in PFRL. utils.set_random_seed(args.seed) # set random seed # Set different random seeds for different subprocesses. # If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3]. # If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7]. process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs assert process_seeds.max() < 2**32 args.outdir = experiments.prepare_output_dir(args, args.outdir) print("Output files are saved in {}".format(args.outdir)) def make_drone_env(cfg, idx, test): # use different seeds for train vs test envs process_seed = int(process_seeds[idx]) env_seed = 2**32 - 1 - process_seed if test else process_seed # env_seed = np.random.randint(0, 2**32 - 1) if not test else process_seed utils.set_random_seed(env_seed) # create the anv environment with goal env = wrapper.FlightEnvVec( QuadrotorGoalConditionedEnv_v1(dump(cfg, Dumper=RoundTripDumper), False)) env.seed(env_seed) if args.render: env = pfrl.wrappers.GymLikeEnvRender(env) return env eval_env = make_drone_env(cfg, 0, test=True) env_state_dim = eval_env.state_dim env_action_dim = eval_env.action_dim env_subgoal_dim = 12 # env_subgoal_dim = eval_env.subgoal_dim env_goal_dim = eval_env.obs_dim action_space = eval_env.action_space subgoal_space = eval_env.subgoal_space scale_low = action_space.high * np.ones(env_action_dim) # create subgoal space in env! scale_high = subgoal_space.high * np.ones(env_subgoal_dim) print(action_space.high, action_space.low) def low_level_burnin_action_func(): """Select random actions until model is updated one or more times.""" return np.random.uniform(action_space.low, action_space.high).astype(np.float32) def high_level_burnin_action_func(): """Select random actions until model is updated one or more times.""" return np.random.uniform(subgoal_space.low, subgoal_space.high).astype(np.float32) gpu = 0 if torch.cuda.is_available() else None agent = HIROAgent( state_dim=env_state_dim, action_dim=env_action_dim, goal_dim=env_goal_dim, subgoal_dim=env_subgoal_dim, high_level_burnin_action_func=high_level_burnin_action_func, low_level_burnin_action_func=low_level_burnin_action_func, scale_low=scale_low, scale_high=scale_high, buffer_size=200000, subgoal_freq=10, train_freq=10, reward_scaling=0.1, goal_threshold=5, gpu=gpu, add_entropy=args.add_entropy) print(args.add_entropy) if args.load: # load weights from a file if arg supplied agent.load(args.load) if args.record: from mujoco_py import GlfwContext GlfwContext(offscreen=True) if args.demo: eval_stats = experiments.eval_performance( env=eval_env, agent=agent, n_steps=None, n_episodes=args.eval_n_runs, video_outdir=args.outdir, step_number=-1 if args.recordd else None # justNonNoneObjectForRecording ) print("n_runs: {} mean: {} median: {} stdev {}".format( args.eval_n_runs, eval_stats["mean"], eval_stats["median"], eval_stats["stdev"], )) else: # train the hierarchical agent experiments.train_hrl_agent_with_evaluation(agent=agent, env=make_drone_env( cfg, 0, test=False), steps=args.steps, outdir=args.outdir, eval_n_steps=None, eval_interval=5000, eval_n_episodes=10, use_tensorboard=True, record=args.record)