def set_weight(self, weight): lock = FileLock(self.xml_path + '.lock') # concurrency protection def modify_ball_xml(ball_xml_file): et = xml.etree.ElementTree.parse(ball_xml_file) et.find('body').find('geom').set('mass', "%0.3f" % weight) # changing ball weight et.write(ball_xml_file) with lock: modify_ball_xml(self.xml_location1) modify_ball_xml(self.xml_location2) self.sim_robot = MujocoSimRobot(self.xml_path, camera_settings=dict( distance=0.7, azimuth=-60, elevation=-50, )) self.sim = self.sim_robot.sim self.model = self.sim_robot.model self.data = self.sim_robot.data self.close() if self.sim_robot.renderer._onscreen_renderer: import ipdb ipdb.set_trace() self.render()
def __init__( self, model_path: str, frame_skip: int, camera_settings: Optional[Dict] = None, ): """Initializes a new MuJoCo environment. Args: model_path: The path to the MuJoCo XML file. frame_skip: The number of simulation steps per environment step. On hardware this influences the duration of each environment step. camera_settings: Settings to initialize the simulation camera. This can contain the keys `distance`, `azimuth`, and `elevation`. """ self._seed() if not os.path.isfile(model_path): raise IOError('[MujocoEnv]: Model path does not exist: {}'.format( model_path)) self.frame_skip = frame_skip self.sim_robot = MujocoSimRobot(model_path, camera_settings=camera_settings) self.sim = self.sim_robot.sim self.model = self.sim_robot.model self.data = self.sim_robot.data self.metadata = { 'render.modes': ['human', 'rgb_array', 'depth_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self.step(np.zeros(self.model.nu)) assert not done bounds = self.model.actuator_ctrlrange.copy() act_upper = bounds[:, 1] act_lower = bounds[:, 0] # Define the action and observation spaces. # HACK: MJRL is still using gym 0.9.x so we can't provide a dtype. try: self.action_space = spaces.Box(act_lower, act_upper, dtype=np.float32) if isinstance(observation, collections.Mapping): self.observation_space = spaces.Dict({ k: spaces.Box(-np.inf, np.inf, shape=v.shape, dtype=np.float32) for k, v in observation.items() }) else: self.obs_dim = np.sum([ o.size for o in observation ]) if type(observation) is tuple else observation.size self.observation_space = spaces.Box(-np.inf, np.inf, (observation.shape[0], ), dtype=np.float32) except TypeError: # Fallback case for gym 0.9.x self.action_space = spaces.Box(act_lower, act_upper) assert not isinstance( observation, collections.Mapping ), 'gym 0.9.x does not support dictionary observation.' self.obs_dim = np.sum([ o.size for o in observation ]) if type(observation) is tuple else observation.size self.observation_space = spaces.Box(-np.inf, np.inf, observation.shape)
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments.""" def __init__( self, model_path: str, frame_skip: int, camera_settings: Optional[Dict] = None, ): """Initializes a new MuJoCo environment. Args: model_path: The path to the MuJoCo XML file. frame_skip: The number of simulation steps per environment step. On hardware this influences the duration of each environment step. camera_settings: Settings to initialize the simulation camera. This can contain the keys `distance`, `azimuth`, and `elevation`. """ self._seed() if not os.path.isfile(model_path): raise IOError('[MujocoEnv]: Model path does not exist: {}'.format( model_path)) self.frame_skip = frame_skip self.sim_robot = MujocoSimRobot(model_path, camera_settings=camera_settings) self.sim = self.sim_robot.sim self.model = self.sim_robot.model self.data = self.sim_robot.data self.metadata = { 'render.modes': ['human', 'rgb_array', 'depth_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self.step(np.zeros(self.model.nu)) assert not done bounds = self.model.actuator_ctrlrange.copy() act_upper = bounds[:, 1] act_lower = bounds[:, 0] # Define the action and observation spaces. # HACK: MJRL is still using gym 0.9.x so we can't provide a dtype. try: self.action_space = spaces.Box(act_lower, act_upper, dtype=np.float32) if isinstance(observation, collections.Mapping): self.observation_space = spaces.Dict({ k: spaces.Box(-np.inf, np.inf, shape=v.shape, dtype=np.float32) for k, v in observation.items() }) else: self.obs_dim = np.sum([ o.size for o in observation ]) if type(observation) is tuple else observation.size self.observation_space = spaces.Box(-np.inf, np.inf, (observation.shape[0], ), dtype=np.float32) except TypeError: # Fallback case for gym 0.9.x self.action_space = spaces.Box(act_lower, act_upper) assert not isinstance( observation, collections.Mapping ), 'gym 0.9.x does not support dictionary observation.' self.obs_dim = np.sum([ o.size for o in observation ]) if type(observation) is tuple else observation.size self.observation_space = spaces.Box(-np.inf, np.inf, observation.shape) def seed(self, seed=None): # Compatibility with new gym return self._seed(seed) def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError # ----------------------------- def reset(self, mode="train"): # compatibility with new gym return self._reset(mode="train") def _reset(self, mode="train"): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() # TODO(michaelahn): Remove this; render should be called separately. if self.mujoco_render_frames is True: self.mj_render() def render(self, mode='human', width=DEFAULT_RENDER_SIZE, height=DEFAULT_RENDER_SIZE, camera_id=-1): """Renders the environment. Args: mode: The type of rendering to use. - 'human': Renders to a graphical window. - 'rgb_array': Returns the RGB image as an np.ndarray. - 'depth_array': Returns the depth image as an np.ndarray. width: The width of the rendered image. This only affects offscreen rendering. height: The height of the rendered image. This only affects offscreen rendering. camera_id: The ID of the camera to use. By default, this is the free camera. If specified, only affects offscreen rendering. """ if mode == 'human': self.sim_robot.renderer.render_to_window() elif mode == 'rgb_array': assert width and height return self.sim_robot.renderer.render_offscreen( width, height, mode=RenderMode.RGB, camera_id=camera_id) elif mode == 'depth_array': assert width and height return self.sim_robot.renderer.render_offscreen( width, height, mode=RenderMode.DEPTH, camera_id=camera_id) elif mode == 'segmentation': assert width and height return self.sim_robot.renderer.render_offscreen( width, height, mode=RenderMode.SEGMENTATION, camera_id=camera_id) else: raise NotImplementedError(mode) def close(self): self.sim_robot.close() def mj_render(self): """Backwards compatibility with MJRL.""" self.render(mode='human') def state_vector(self): state = self.sim.get_state() return np.concatenate([state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration', env=None, render=True): if env is None: env = self self.mujoco_render_frames = render all_rewards = [] all_scores_mean = [] all_scores_last = [] for ep in range(num_episodes): o = env.reset() d = False t = 0 rew_for_ep = 0 scores_for_ep = [] while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['evaluation'] o, r, d, info = env.step(a) if type(d) == np.float64: d = not (d == 0) t = t + 1 rew_for_ep += r scores_for_ep.append(info['score']) all_rewards.append(rew_for_ep) all_scores_mean.append(np.mean(scores_for_ep)) all_scores_last.append(np.mean(scores_for_ep[-5:])) print("rollout rew: ", rew_for_ep) print("\n\nREW: ", np.mean(all_rewards), ",", np.std(all_rewards)) print("SCO mean: ", np.mean(all_scores_mean), ",", np.std(all_scores_mean), "\n\n") self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, frame_size=(640, 480), mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print('Episode %d: rendering offline ' % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = time.time() while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t + 1 curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1], mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1, :, :]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + '.mp4' skvideo.io.vwrite(file_name, np.asarray(arrs)) print('saved', file_name) t1 = time.time() print('time taken = %f' % (t1 - t0))