class MujocoEnv(Env): FILE = None def __init__(self, file_path=None, action_noise=0.0, random_init_state=True): # compile template assert file_path is not None self.model = MjModel(file_path) self.data = self.model.data self.viewer = None self.init_qpos = self.model.data.qpos self.init_qvel = self.model.data.qvel self.init_qacc = self.model.data.qacc self.init_ctrl = self.model.data.ctrl self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise self.random_init_state = random_init_state if "frame_skip" in self.model.numeric_names: frame_skip_id = self.model.numeric_names.index("frame_skip") addr = self.model.numeric_adr.flat[frame_skip_id] self.frame_skip = int(self.model.numeric_data.flat[addr]) else: self.frame_skip = 1 if "init_qpos" in self.model.numeric_names: init_qpos_id = self.model.numeric_names.index("init_qpos") addr = self.model.numeric_adr.flat[init_qpos_id] size = self.model.numeric_size.flat[init_qpos_id] init_qpos = self.model.numeric_data.flat[addr:addr + size] self.init_qpos = init_qpos self.dcom = None self.current_com = None self.reset() super(MujocoEnv, self).__init__() @property def action_space(self): bounds = self.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] return spaces.Box(lb, ub) @property def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return spaces.Box(ub * -1, ub) @property def action_bounds(self): return self.action_space.bounds def reset_mujoco(self, init_state=None): if init_state is None: if self.random_init_state: self.model.data.qpos = self.init_qpos + \ np.random.normal(size=self.init_qpos.shape) * 0.01 self.model.data.qvel = self.init_qvel + \ np.random.normal(size=self.init_qvel.shape) * 0.1 else: self.model.data.qpos = self.init_qpos self.model.data.qvel = self.init_qvel self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl else: start = 0 for datum_name in ["qpos", "qvel", "qacc", "ctrl"]: datum = getattr(self.model.data, datum_name) datum_dim = datum.shape[0] datum = init_state[start: start + datum_dim] setattr(self.model.data, datum_name, datum) start += datum_dim def reset(self, init_state=None): self.reset_mujoco(init_state) self.model.forward() self.current_com = self.model.data.com_subtree[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_obs() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.model.data cdists = np.copy(self.model.geom_margin).flat for c in self.model.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate([ self.model.data.qpos.flat, self.model.data.qvel.flat ]) @property def _full_state(self): return np.concatenate([ self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl, ]).ravel() def inject_action_noise(self, action): # generate action noise noise = self.action_noise * \ np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): self.model.data.ctrl = self.inject_action_noise(action) for _ in range(self.frame_skip): self.model.step() self.model.forward() new_com = self.model.data.com_subtree[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self, config=None): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) if config is not None: self.viewer.set_window_pose(config["xpos"], config["ypos"]) self.viewer.set_window_size(config["width"], config["height"]) self.viewer.set_window_title(config["title"]) return self.viewer def render(self, close=False, mode='human', config=None): if mode == 'human': viewer = self.get_viewer(config=config) viewer.loop_once() elif mode == 'rgb_array': viewer = self.get_viewer(config=config) viewer.loop_once() #TODO: figure out purpose of these two lines #TODO Figure out how to run without glfw init? # self.get_viewer(config=config).render() data, width, height = self.get_viewer(config=config).get_image() return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1,:,:] if close: self.stop_viewer() def start_viewer(self): viewer = self.get_viewer() if not viewer.running: viewer.start() def stop_viewer(self): if self.viewer: self.viewer.finish() self.viewer = None def release(self): # temporarily alleviate the issue (but still some leak) from mujoco_py.mjlib import mjlib mjlib.mj_deleteModel(self.model._wrapped) mjlib.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.xmat[idx].reshape((3, 3)) def get_body_com(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.com_subtree[idx] def get_body_comvel(self, body_name): idx = self.model.body_names.index(body_name) return self.model.body_comvels[idx] def print_stats(self): super(MujocoEnv, self).print_stats() print("qpos dim:\t%d" % len(self.model.data.qpos)) def action_from_key(self, key): raise NotImplementedError def set_state_tmp(self, state, restore=True): if restore: prev_pos = self.model.data.qpos prev_qvel = self.model.data.qvel prev_ctrl = self.model.data.ctrl prev_act = self.model.data.act qpos, qvel = self.decode_state(state) self.model.data.qpos = qpos self.model.data.qvel = qvel self.model.forward() yield if restore: self.model.data.qpos = prev_pos self.model.data.qvel = prev_qvel self.model.data.ctrl = prev_ctrl self.model.data.act = prev_act self.model.forward() def get_param_values(self): return {} def set_param_values(self, values): pass
class MujocoEnv(Env): FILE = None @autoargs.arg('action_noise', type=float, help='Noise added to the controls, which will be ' 'proportional to the action bounds') def __init__(self, action_noise=0.0, file_path=None, template_args=None): # compile template if file_path is None: if self.__class__.FILE is None: raise "Mujoco file not specified" file_path = osp.join(MODEL_DIR, self.__class__.FILE) if file_path.endswith(".mako"): lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR]) with open(file_path) as template_file: template = mako.template.Template(template_file.read(), lookup=lookup) content = template.render( opts=template_args if template_args is not None else {}, ) tmp_f, file_path = tempfile.mkstemp(text=True) with open(file_path, 'w') as f: f.write(content) self.model = MjModel(file_path) os.close(tmp_f) else: self.model = MjModel(file_path) self.data = self.model.data self.viewer = None self.init_qpos = self.model.data.qpos self.init_qvel = self.model.data.qvel self.init_qacc = self.model.data.qacc self.init_ctrl = self.model.data.ctrl self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise if "frame_skip" in self.model.numeric_names: frame_skip_id = self.model.numeric_names.index("frame_skip") addr = self.model.numeric_adr.flat[frame_skip_id] self.frame_skip = int(self.model.numeric_data.flat[addr]) else: self.frame_skip = 1 if "init_qpos" in self.model.numeric_names: init_qpos_id = self.model.numeric_names.index("init_qpos") addr = self.model.numeric_adr.flat[init_qpos_id] size = self.model.numeric_size.flat[init_qpos_id] init_qpos = self.model.numeric_data.flat[addr:addr + size] self.init_qpos = init_qpos self.dcom = None self.current_com = None self.reset() super(MujocoEnv, self).__init__() @property @overrides def action_space(self): bounds = self.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] return spaces.Box(lb, ub) @property @overrides def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return spaces.Box(ub * -1, ub) @property def action_bounds(self): return self.action_space.bounds def reset_mujoco(self, init_state=None): if init_state is None: self.model.data.qpos = self.init_qpos + \ np.random.normal(size=self.init_qpos.shape) * 0.01 self.model.data.qvel = self.init_qvel + \ np.random.normal(size=self.init_qvel.shape) * 0.1 self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl else: start = 0 for datum_name in ["qpos", "qvel", "qacc", "ctrl"]: datum = getattr(self.model.data, datum_name) datum_dim = datum.shape[0] datum = init_state[start:start + datum_dim] setattr(self.model.data, datum_name, datum) start += datum_dim @overrides def reset(self, init_state=None, **kwargs): self.reset_mujoco(init_state) self.model.forward() self.current_com = self.model.data.com_subtree[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_obs() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.model.data cdists = np.copy(self.model.geom_margin).flat for c in self.model.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate( [self.model.data.qpos.flat, self.model.data.qvel.flat]) @property def _full_state(self): return np.concatenate([ self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl, ]).ravel() def inject_action_noise(self, action): # generate action noise noise = self.action_noise * \ np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): self.model.data.ctrl = self.inject_action_noise(action) for _ in range(self.frame_skip): self.model.step() self.model.forward() new_com = self.model.data.com_subtree[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) return self.viewer def render(self): viewer = self.get_viewer() viewer.loop_once() def start_viewer(self): viewer = self.get_viewer() if not viewer.running: viewer.start() def stop_viewer(self): if self.viewer: self.viewer.finish() def release(self): # temporarily alleviate the issue (but still some leak) from rllab.mujoco_py.mjlib import mjlib mjlib.mj_deleteModel(self.model._wrapped) mjlib.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.xmat[idx].reshape((3, 3)) def get_body_com(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.com_subtree[idx] def get_body_comvel(self, body_name): idx = self.model.body_names.index(body_name) return self.model.body_comvels[idx] def print_stats(self): super(MujocoEnv, self).print_stats() print("qpos dim:\t%d" % len(self.model.data.qpos)) def action_from_key(self, key): raise NotImplementedError
class PR2ArmClockEnv(MujocoEnv, Serializable): def __init__(self, target=DEFAULT_TARGET, *args, **kwargs): self.target = target kwargs['file_path'] = mujoco_model_path(FILE) super(PR2ArmClockEnv, self).__init__(*args, **kwargs) Serializable.quick_init(self, locals()) def get_current_obs(self): return np.concatenate([ self.joint_angles(), self.finger_to_target(), ]) @overrides def get_viewer(self): if self.viewer is None: self.viewer = MjViewer( title='Simulate: target = {}'.format(self.target)) self.viewer.start() self.viewer.set_model(self.model) return self.viewer @overrides @property def action_space(self): shape = self.model.actuator_ctrlrange[:, 0].shape lb = np.full(shape, -ACTION_LIMIT) ub = np.full(shape, ACTION_LIMIT) return spaces.Box(lb, ub) def step(self, action): self.forward_dynamics(action) next_obs = self.get_current_obs() distance_to_go = self.finger_to_target_dist() vel_cost = 1e-2 * np.linalg.norm(self.joint_velocities()) reward = -distance_to_go - vel_cost done = self.finger_to_target_dist() < self.target_size() return Step(next_obs, reward, done) def joint_angles(self): return self.model.data.qpos.flat def joint_velocities(self): return self.model.data.qvel.flat def finger_to_target(self): return self._get_geom_pos('finger') - self._get_geom_pos(self.target) def finger_to_target_dist(self): return np.linalg.norm(self.finger_to_target()) def target_size(self): return self._get_geom_size(self.target) def _get_geom_pos(self, geom_name): idx = self.model.geom_names.index(geom_name) return self.model.data.geom_xpos[idx] def _get_geom_size(self, geom_name): idx = self.model.geom_names.index(geom_name) return self.model.geom_size[idx][0]