class MujocoEnv(Env): FILE = None @autoargs.arg('action_noise', type=float, help='Noise added to the controls, which will be ' 'proportional to the action bounds') def __init__(self, action_noise=0.0, file_path=None, template_args=None): # compile template if file_path is None: if self.__class__.FILE is None: raise "Mujoco file not specified" file_path = osp.join(MODEL_DIR, self.__class__.FILE) if file_path.endswith(".mako"): lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR]) with open(file_path) as template_file: template = mako.template.Template(template_file.read(), lookup=lookup) content = template.render( opts=template_args if template_args is not None else {}, ) tmp_f, file_path = tempfile.mkstemp(text=True) with open(file_path, 'w') as f: f.write(content) self.model = MjModel(file_path) os.close(tmp_f) else: self.model = MjModel(file_path) self.data = self.model.data self.viewer = None self.init_qpos = self.model.data.qpos self.init_qvel = self.model.data.qvel self.init_qacc = self.model.data.qacc self.init_ctrl = self.model.data.ctrl self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise if "frame_skip" in self.model.numeric_names: frame_skip_id = self.model.numeric_names.index("frame_skip") addr = self.model.numeric_adr.flat[frame_skip_id] self.frame_skip = int(self.model.numeric_data.flat[addr]) else: self.frame_skip = 1 if "init_qpos" in self.model.numeric_names: init_qpos_id = self.model.numeric_names.index("init_qpos") addr = self.model.numeric_adr.flat[init_qpos_id] size = self.model.numeric_size.flat[init_qpos_id] init_qpos = self.model.numeric_data.flat[addr:addr + size] self.init_qpos = init_qpos self.dcom = None self.current_com = None self.reset() super(MujocoEnv, self).__init__() @property @overrides def action_space(self): bounds = self.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] return spaces.Box(lb, ub) @property @overrides def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return spaces.Box(ub * -1, ub) @property def action_bounds(self): return self.action_space.bounds def reset_mujoco(self, init_state=None): if init_state is None: self.model.data.qpos = self.init_qpos + \ np.random.normal(size=self.init_qpos.shape) * 0.01 self.model.data.qvel = self.init_qvel + \ np.random.normal(size=self.init_qvel.shape) * 0.1 self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl else: start = 0 for datum_name in ["qpos", "qvel", "qacc", "ctrl"]: datum = getattr(self.model.data, datum_name) datum_dim = datum.shape[0] datum = init_state[start:start + datum_dim] setattr(self.model.data, datum_name, datum) start += datum_dim @overrides def reset(self, init_state=None, **kwargs): self.reset_mujoco(init_state) self.model.forward() self.current_com = self.model.data.com_subtree[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_obs() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.model.data cdists = np.copy(self.model.geom_margin).flat for c in self.model.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate( [self.model.data.qpos.flat, self.model.data.qvel.flat]) @property def _full_state(self): return np.concatenate([ self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl, ]).ravel() def inject_action_noise(self, action): # generate action noise noise = self.action_noise * \ np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): self.model.data.ctrl = self.inject_action_noise(action) for _ in range(self.frame_skip): self.model.step() self.model.forward() new_com = self.model.data.com_subtree[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) return self.viewer def render(self): viewer = self.get_viewer() viewer.loop_once() def start_viewer(self): viewer = self.get_viewer() if not viewer.running: viewer.start() def stop_viewer(self): if self.viewer: self.viewer.finish() def release(self): # temporarily alleviate the issue (but still some leak) from rllab.mujoco_py.mjlib import mjlib mjlib.mj_deleteModel(self.model._wrapped) mjlib.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.xmat[idx].reshape((3, 3)) def get_body_com(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.com_subtree[idx] def get_body_comvel(self, body_name): idx = self.model.body_names.index(body_name) return self.model.body_comvels[idx] def print_stats(self): super(MujocoEnv, self).print_stats() print("qpos dim:\t%d" % len(self.model.data.qpos)) def action_from_key(self, key): raise NotImplementedError
class MujocoEnv(gym.Env): FILE = None @autoargs.arg('action_noise', type=float, help='Noise added to the controls, which will be ' 'proportional to the action bounds') def __init__(self, action_noise=0.0, file_path=None, template_args=None): # compile template if file_path is None: if self.__class__.FILE is None: raise NotImplementedError("Mujoco file not specified") file_path = osp.join(MODEL_DIR, self.__class__.FILE) if file_path.endswith(".mako"): lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR]) with open(file_path) as template_file: template = mako.template.Template(template_file.read(), lookup=lookup) content = template.render( opts=template_args if template_args is not None else {}, ) tmp_f, file_path = tempfile.mkstemp(suffix=".xml", text=True) with open(file_path, 'w') as f: f.write(content) self.model = load_model_from_path(file_path) os.close(tmp_f) else: self.model = load_model_from_path(file_path) self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None self.render_width = 512 self.render_height = 512 self.render_camera = None self.init_qpos = self.sim.data.qpos self.init_qvel = self.sim.data.qvel self.init_qacc = self.sim.data.qacc self.init_ctrl = self.sim.data.ctrl self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise self.frame_skip = 1 self.dcom = None self.current_com = None self.reset() super().__init__() @cached_property @overrides def action_space(self): bounds = self.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] return gym.spaces.Box(lb, ub, dtype=np.float32) @cached_property @overrides def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return gym.spaces.Box(ub * -1, ub, dtype=np.float32) @property def action_bounds(self): assert isinstance(self.action_space, gym.spaces.Box) return self.action_space.low, self.action_space.high def reset_mujoco(self, init_state=None): self.sim.reset() if init_state is None: self.sim.data.qpos[:] = self.init_qpos + np.random.normal( size=self.init_qpos.shape) * 0.01 self.sim.data.qvel[:] = self.init_qvel + np.random.normal( size=self.init_qvel.shape) * 0.1 self.sim.data.qacc[:] = self.init_qacc self.sim.data.ctrl[:] = self.init_ctrl else: start = 0 for datum_name in ["qpos", "qvel", "qacc", "ctrl"]: datum = getattr(self.sim.data, datum_name) datum_dim = datum.shape[0] datum = init_state[start:start + datum_dim] setattr(self.sim.data, datum_name, datum) start += datum_dim @overrides def reset(self, init_state=None): self.reset_mujoco(init_state) self.sim.forward() self.current_com = self.sim.data.subtree_com[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_obs() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.sim.data cdists = np.copy(self.sim.model.geom_margin).flat for c in self.sim.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, data.cinert.flat, data.cvel.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate( [self.sim.data.qpos.flat, self.sim.data.qvel.flat]) @property def _full_state(self): return np.concatenate([ self.sim.data.qpos, self.sim.data.qvel, self.sim.data.qacc, self.sim.data.ctrl, ]).ravel() def inject_action_noise(self, action): # generate action noise noise = self.action_noise * np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): self.sim.data.ctrl[:] = self.inject_action_noise(action) for _ in range(self.frame_skip): self.sim.step() self.sim.forward() new_com = self.sim.data.subtree_com[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self): if self.viewer is None: self.viewer = MjViewer(self.sim) return self.viewer def render(self, close=False, mode='human'): if mode == 'human': viewer = self.get_viewer() viewer.render() return None elif mode == 'rgb_array': img = self.sim.render(self.render_width, self.render_height, camera_name=self.render_camera) img = img[::-1, :, :] return img if close: self.stop_viewer() return None def start_viewer(self): viewer = self.get_viewer() if not viewer.running: viewer.start() def stop_viewer(self): if self.viewer: self.viewer.finish() def release(self): # temporarily alleviate the issue (but still some leak) functions.mj_deleteModel(self.sim._wrapped) functions.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): return self.data.get_body_xmat(body_name).reshape((3, 3)) def get_body_com(self, body_name): return self.data.get_body_xpos(body_name) def get_body_comvel(self, body_name): return self.data.get_body_xvelp(body_name) def print_stats(self): super(MujocoEnv, self).print_stats() print("qpos dim:\t%d" % len(self.sim.data.qpos)) def action_from_key(self, key): raise NotImplementedError
class MujocoEnv(Env): FILE = None def __init__(self, file_path=None, action_noise=0.0, random_init_state=True): # compile template assert file_path is not None self.model = MjModel(file_path) self.data = self.model.data self.viewer = None self.init_qpos = self.model.data.qpos self.init_qvel = self.model.data.qvel self.init_qacc = self.model.data.qacc self.init_ctrl = self.model.data.ctrl self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise self.random_init_state = random_init_state if "frame_skip" in self.model.numeric_names: frame_skip_id = self.model.numeric_names.index("frame_skip") addr = self.model.numeric_adr.flat[frame_skip_id] self.frame_skip = int(self.model.numeric_data.flat[addr]) else: self.frame_skip = 1 if "init_qpos" in self.model.numeric_names: init_qpos_id = self.model.numeric_names.index("init_qpos") addr = self.model.numeric_adr.flat[init_qpos_id] size = self.model.numeric_size.flat[init_qpos_id] init_qpos = self.model.numeric_data.flat[addr:addr + size] self.init_qpos = init_qpos self.dcom = None self.current_com = None self.reset() super(MujocoEnv, self).__init__() @property def action_space(self): bounds = self.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] return spaces.Box(lb, ub) @property def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return spaces.Box(ub * -1, ub) @property def action_bounds(self): return self.action_space.bounds def reset_mujoco(self, init_state=None): if init_state is None: if self.random_init_state: self.model.data.qpos = self.init_qpos + \ np.random.normal(size=self.init_qpos.shape) * 0.01 self.model.data.qvel = self.init_qvel + \ np.random.normal(size=self.init_qvel.shape) * 0.1 else: self.model.data.qpos = self.init_qpos self.model.data.qvel = self.init_qvel self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl else: start = 0 for datum_name in ["qpos", "qvel", "qacc", "ctrl"]: datum = getattr(self.model.data, datum_name) datum_dim = datum.shape[0] datum = init_state[start: start + datum_dim] setattr(self.model.data, datum_name, datum) start += datum_dim def reset(self, init_state=None): self.reset_mujoco(init_state) self.model.forward() self.current_com = self.model.data.com_subtree[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_obs() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.model.data cdists = np.copy(self.model.geom_margin).flat for c in self.model.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate([ self.model.data.qpos.flat, self.model.data.qvel.flat ]) @property def _full_state(self): return np.concatenate([ self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl, ]).ravel() def inject_action_noise(self, action): # generate action noise noise = self.action_noise * \ np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): self.model.data.ctrl = self.inject_action_noise(action) for _ in range(self.frame_skip): self.model.step() self.model.forward() new_com = self.model.data.com_subtree[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self, config=None): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) if config is not None: self.viewer.set_window_pose(config["xpos"], config["ypos"]) self.viewer.set_window_size(config["width"], config["height"]) self.viewer.set_window_title(config["title"]) return self.viewer def render(self, close=False, mode='human', config=None): if mode == 'human': viewer = self.get_viewer(config=config) viewer.loop_once() elif mode == 'rgb_array': viewer = self.get_viewer(config=config) viewer.loop_once() #TODO: figure out purpose of these two lines #TODO Figure out how to run without glfw init? # self.get_viewer(config=config).render() data, width, height = self.get_viewer(config=config).get_image() return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1,:,:] if close: self.stop_viewer() def start_viewer(self): viewer = self.get_viewer() if not viewer.running: viewer.start() def stop_viewer(self): if self.viewer: self.viewer.finish() self.viewer = None def release(self): # temporarily alleviate the issue (but still some leak) from mujoco_py.mjlib import mjlib mjlib.mj_deleteModel(self.model._wrapped) mjlib.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.xmat[idx].reshape((3, 3)) def get_body_com(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.com_subtree[idx] def get_body_comvel(self, body_name): idx = self.model.body_names.index(body_name) return self.model.body_comvels[idx] def print_stats(self): super(MujocoEnv, self).print_stats() print("qpos dim:\t%d" % len(self.model.data.qpos)) def action_from_key(self, key): raise NotImplementedError def set_state_tmp(self, state, restore=True): if restore: prev_pos = self.model.data.qpos prev_qvel = self.model.data.qvel prev_ctrl = self.model.data.ctrl prev_act = self.model.data.act qpos, qvel = self.decode_state(state) self.model.data.qpos = qpos self.model.data.qvel = qvel self.model.forward() yield if restore: self.model.data.qpos = prev_pos self.model.data.qvel = prev_qvel self.model.data.ctrl = prev_ctrl self.model.data.act = prev_act self.model.forward() def get_param_values(self): return {} def set_param_values(self, values): pass