Ejemplo n.º 1
0
class HalfCheetahSimulator(BaseSimulator):
    def __init__(self, **kwargs):
        super(HalfCheetahSimulator, self).__init__(**kwargs)

        self._imSz = 512
        self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8)

        self.model = MjModel(
            '../rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml')
        self.viewer = None
        self._pos = {}
        self._pos['torso'] = np.zeros((3, ))
        self._range_min = -1
        self._range_max = 1

        self.body_comvel = 0
        self.action = np.zeros((1, 2))
        self.init_qpos = self.model.data.qpos
        self.init_qvel = self.model.data.qvel
        self.init_qacc = self.model.data.qacc
        self.init_ctrl = self.model.data.ctrl
        self.frame_skip = 1

    @overrides
    def step(self, ctrl, loop=False):
        ctrl = np.clip(
            ctrl,
            *(np.array([-1, -1, -1, -1, -1, -1]), np.array([1, 1, 1, 1, 1,
                                                            1])))
        self.model.data.ctrl = ctrl  # + np.random.normal(size=ctrl.shape)
        # print('gym', self.model.data.ctrl)
        for i in range(self.frame_skip):
            self.model.step()
        self.model.forward()
        ind = self.model.body_names.index('torso')
        self._pos['torso'] = self.model.body_pos[ind]
        self.body_comvel = self.model.body_comvels[ind]
        self.action = ctrl

    @overrides
    def get_image(self):
        data, width, height = self.viewer.get_image()
        self._im = np.fromstring(data,
                                 dtype='uint8').reshape(height, width,
                                                        3)[::-1, :, :]
        # print(self._im)
        return self._im.copy()

    @overrides
    def _setup_renderer(self):
        self.viewer = MjViewer(visible=True,
                               init_width=self._imSz,
                               init_height=self._imSz)
        self.viewer.start()
        self.viewer.set_model(self.model)

    @overrides
    def render(self):
        self.viewer.loop_once()
class ReacherTwoEnv(MujocoEnv, Serializable):

    FILE = 'reacher.xml'

    def __init__(self, *args, **kwargs):
        super(ReacherTwoEnv, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals())

    def step(self, a):
        vec = self.get_body_com("fingertip") - self.get_body_com("target")
        reward_dist = -np.linalg.norm(vec)
        reward_ctrl = 0  #- np.square(a).sum()
        #reward_close = 0.01*math.log(-reward_dist)
        reward = reward_dist + reward_ctrl  #+ reward_close
        self.forward_dynamics(a)
        next_obs = self.get_current_obs()
        return Step(next_obs, reward, False)
        #done = False
        #return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)

    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 0

    def get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(init_width=25, init_height=25)
            self.viewer.start()
            self.viewer.set_model(self.model)
        return self.viewer

    def reset_mujoco(self, init_state=None):
        qpos = np.random.uniform(low=-0.1, high=0.1,
                                 size=self.model.nq) + self.init_qpos.flat
        #while True:
        #    self.goal = np.random.uniform(low=-.2, high=.2, size=2)
        #    if np.linalg.norm(self.goal) < 2: break
        self.goal = np.array([0.1, 0.1])
        qpos[-2:] = self.goal
        qvel = self.init_qvel.flat + np.random.uniform(
            low=-.005, high=.005, size=self.model.nv)
        qvel[-2:] = 0
        self.model.data.qpos = qpos
        self.model.data.qvel = qvel
        self.model.data.qacc = self.init_qacc
        self.model.data.ctrl = self.init_ctrl
        return self.get_current_obs()

    def get_current_obs(self):
        theta = self.model.data.qpos.flat[:2]
        return np.concatenate([
            np.cos(theta),
            np.sin(theta), self.model.data.qpos.flat[2:],
            self.model.data.qvel.flat[:2],
            self.get_body_com("fingertip") - self.get_body_com("target")
        ])

    reset_trial = reset_mujoco  # shortcut for compatibility.
Ejemplo n.º 3
0
class BlockEnv(MujocoEnv, Serializable):
    global FILE

    def __init__(self, *args, **kwargs):
        FILE = './block_stack_{}.xml'.format(num_blocks)
        # FILE = './block_stack_2.xml'
        super(BlockEnv, self).__init__(*args, **kwargs, file_path=FILE)
        Serializable.quick_init(self, locals())

    def reset(self, init_state=None):
        """
        :param init_state: (2, 2) positions for the first and the second block.
        :return: set geom_xpos
        """
        if init_state is None:
            init_state = (np.random.rand(num_blocks, 2) * loc) - (loc / 2)
        init_pos = np.zeros((num_blocks * 3))
        init_pos[:2] = init_state[0] - self.model.body_pos[1, :2]
        init_pos[3:5] = init_state[1] - self.model.body_pos[2, :2]
        full_state = np.concatenate([
            init_pos,
            self.init_qvel.squeeze(),
            self.init_qacc.squeeze(),
            self.init_ctrl.squeeze()
        ])
        obs = super(BlockEnv, self).reset(init_state=full_state)
        return obs

    def get_current_obs(self):
        return self.model.data.geom_xpos

    def viewer_setup(self, config=None):
        viewer = self.get_viewer()
        viewer.cam.trackbodyid = 0
        viewer.cam.distance = 2.75
        viewer.cam.elevation = -60

    def step(self, action):
        done = False
        reward = 0
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()

        return Step(next_obs, reward, done)

    def step_only(self, action):
        next_state = self.get_current_obs()[1:, :2] + action.reshape(2, 2)
        return self.reset(init_state=next_state)

    def get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(visible=False)
            self.viewer.start()
            self.viewer.set_model(self.model)
        return self.viewer
Ejemplo n.º 4
0
class InvertedPendulumTwoEnv(MujocoEnv, Serializable):

    FILE = 'inverted_pend_two.xml'

    def __init__(self, *args, **kwargs):
        super(InvertedPendulumTwoEnv, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals())

    def step(self, a):
        reward = 1.0
        self.forward_dynamics(a)
        ob = self.get_current_obs()
        notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
        #done = not notdone
        done = False
        if not notdone:
            reward = 0
        return Step(ob, reward, done)

    def get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(init_width=50, init_height=50)
            self.viewer.start()
            self.viewer.set_model(self.model)
            #self.viewer.cam.elevation = -42.59999990463257
        return self.viewer

    def reset_mujoco(self, init_state=None):
        qpos = self.init_qpos + np.random.uniform(
            size=self.model.nq, low=-0.01, high=0.01)
        qvel = self.init_qvel + np.random.uniform(
            size=self.model.nv, low=-0.01, high=0.01)
        self.model.data.qpos = qpos
        self.model.data.qvel = qvel
        self.model.data.qacc = self.init_qacc
        self.model.data.ctrl = self.init_ctrl
        return self.get_current_obs()

    reset_trial = reset_mujoco

    def get_current_obs(self):
        return np.concatenate([self.model.data.qpos,
                               self.model.data.qvel]).ravel()

    def viewer_setup(self):
        v = self.viewer
        v.cam.trackbodyid = 0
        v.cam.distance = v.model.stat.extent
Ejemplo n.º 5
0
class MujocoEnv(Env):
    FILE = None

    @autoargs.arg('action_noise', type=float,
                  help='Noise added to the controls, which will be '
                       'proportional to the action bounds')
    def __init__(self, action_noise=0.0, file_path=None, template_args=None):
        # compile template
        if file_path is None:
            if self.__class__.FILE is None:
                raise "Mujoco file not specified"
            file_path = osp.join(MODEL_DIR, self.__class__.FILE)
        if file_path.endswith(".mako"):
            lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR])
            with open(file_path) as template_file:
                template = mako.template.Template(
                    template_file.read(), lookup=lookup)
            content = template.render(
                opts=template_args if template_args is not None else {},
            )
            tmp_f, file_path = tempfile.mkstemp(text=True)
            with open(file_path, 'w') as f:
                f.write(content)
            self.model = MjModel(file_path)
            os.close(tmp_f)
        else:
            self.model = MjModel(file_path)
        self.data = self.model.data
        self.viewer = None
        self.init_qpos = self.model.data.qpos
        self.init_qvel = self.model.data.qvel
        self.init_qacc = self.model.data.qacc
        self.init_ctrl = self.model.data.ctrl
        self.qpos_dim = self.init_qpos.size
        self.qvel_dim = self.init_qvel.size
        self.ctrl_dim = self.init_ctrl.size
        self.action_noise = action_noise
        if "frame_skip" in self.model.numeric_names:
            frame_skip_id = self.model.numeric_names.index("frame_skip")
            addr = self.model.numeric_adr.flat[frame_skip_id]
            self.frame_skip = int(self.model.numeric_data.flat[addr])
        else:
            self.frame_skip = 1
        if "init_qpos" in self.model.numeric_names:
            init_qpos_id = self.model.numeric_names.index("init_qpos")
            addr = self.model.numeric_adr.flat[init_qpos_id]
            size = self.model.numeric_size.flat[init_qpos_id]
            init_qpos = self.model.numeric_data.flat[addr:addr + size]
            self.init_qpos = init_qpos
        self.dcom = None
        self.current_com = None
        self.reset()
        super(MujocoEnv, self).__init__()

    @property
    @overrides
    def action_space(self):
        bounds = self.model.actuator_ctrlrange
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        return spaces.Box(lb, ub)

    @property
    @overrides
    def observation_space(self):
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        return spaces.Box(ub * -1, ub)

    @property
    def action_bounds(self):
        return self.action_space.bounds

    def reset_mujoco(self, init_state=None):
        if init_state is None:
            self.model.data.qpos = self.init_qpos + \
                                   np.random.normal(size=self.init_qpos.shape) * 0.01
            self.model.data.qvel = self.init_qvel + \
                                   np.random.normal(size=self.init_qvel.shape) * 0.1
            self.model.data.qacc = self.init_qacc
            self.model.data.ctrl = self.init_ctrl
        else:
            start = 0
            for datum_name in ["qpos", "qvel", "qacc", "ctrl"]:
                datum = getattr(self.model.data, datum_name)
                datum_dim = datum.shape[0]
                datum = init_state[start: start + datum_dim]
                setattr(self.model.data, datum_name, datum)
                start += datum_dim

    @overrides
    def reset(self, init_state=None):
        self.reset_mujoco(init_state)
        self.model.forward()
        self.current_com = self.model.data.com_subtree[0]
        self.dcom = np.zeros_like(self.current_com)
        return self.get_current_obs()

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.model.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.model.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate([
            self.model.data.qpos.flat,
            self.model.data.qvel.flat
        ])

    @property
    def _full_state(self):
        return np.concatenate([
            self.model.data.qpos,
            self.model.data.qvel,
            self.model.data.qacc,
            self.model.data.ctrl,
        ]).ravel()

    def inject_action_noise(self, action):
        # generate action noise
        noise = self.action_noise * \
                np.random.normal(size=action.shape)
        # rescale the noise to make it proportional to the action bounds
        lb, ub = self.action_bounds
        noise = 0.5 * (ub - lb) * noise
        return action + noise

    def forward_dynamics(self, action):
        self.model.data.ctrl = self.inject_action_noise(action)
        for _ in range(self.frame_skip):
            self.model.step()
        self.model.forward()
        new_com = self.model.data.com_subtree[0]
        self.dcom = new_com - self.current_com
        self.current_com = new_com

    def get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer()
            self.viewer.start()
            self.viewer.set_model(self.model)
        return self.viewer

    def render(self):
        viewer = self.get_viewer()
        viewer.loop_once()

    def start_viewer(self):
        viewer = self.get_viewer()
        if not viewer.running:
            viewer.start()

    def stop_viewer(self):
        if self.viewer:
            self.viewer.finish()

    def release(self):
        # temporarily alleviate the issue (but still some leak)
        from rllab.mujoco_py.mjlib import mjlib
        mjlib.mj_deleteModel(self.model._wrapped)
        mjlib.mj_deleteData(self.data._wrapped)

    def get_body_xmat(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.model.data.xmat[idx].reshape((3, 3))

    def get_body_com(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.model.data.com_subtree[idx]

    def get_body_comvel(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.model.body_comvels[idx]

    def print_stats(self):
        super(MujocoEnv, self).print_stats()
        print("qpos dim:\t%d" % len(self.model.data.qpos))

    def action_from_key(self, key):
        raise NotImplementedError
Ejemplo n.º 6
0
class SwimmerUnevenFloorEnv(MujocoEnv, Serializable):
    """
    This SwimmerEnv differs from the one in rllab.envs.mujoco.swimmer_env in the additional initialization options
    that fix the rewards and observation space.
    The 'com' is added to the env_infos.
    Also the get_ori() method is added for the Maze and Gather tasks.
    """
    FILE = 'swimmer_hfield.xml'
    ORI_IND = 2

    @autoargs.arg('ctrl_cost_coeff',
                  type=float,
                  help='cost coefficient for controls')
    def __init__(self,
                 ctrl_cost_coeff=1e-2,
                 ego_obs=False,
                 sparse_rew=False,
                 *args,
                 **kwargs):
        self.ctrl_cost_coeff = ctrl_cost_coeff
        self.ego_obs = ego_obs
        self.sparse_rew = sparse_rew
        super(SwimmerUnevenFloorEnv, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals())

    def get_current_obs(self):
        if self.ego_obs:
            return np.concatenate([
                self.model.data.qpos.flat[2:],
                self.model.data.qvel.flat,
            ]).reshape(-1)
        else:
            return np.concatenate([
                self.model.data.qpos.flat,
                self.model.data.qvel.flat,
                self.get_body_com("torso").flat,
            ]).reshape(-1)

    def get_ori(self):
        return self.model.data.qpos[self.__class__.ORI_IND]

    def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
            np.square(action / scaling))
        forward_reward = np.linalg.norm(self.get_body_comvel(
            "torso"))  # swimmer has no problem of jumping reward
        reward = forward_reward - ctrl_cost
        done = False
        if self.sparse_rew:
            if abs(self.get_body_com("torso")[0]) > 100.0:
                reward = 1.0
                done = True
            else:
                reward = 0.
        com = np.concatenate([self.get_body_com("torso").flat]).reshape(-1)
        ori = self.get_ori()
        return Step(next_obs, reward, done, com=com, ori=ori)

    @overrides
    def log_diagnostics(self, paths, prefix=''):
        progs = [
            np.linalg.norm(path["env_infos"]["com"][-1] -
                           path["env_infos"]["com"][0]) for path in paths
        ]
        logger.record_tabular_misc_stat('Progress', progs)
        self.plot_visitations(paths, visit_prefix=prefix)

    @overrides
    def get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(init_height=1000, init_width=1000)
            self.viewer.start()
            self.viewer.set_model(self.model)
        return self.viewer
Ejemplo n.º 7
0
class MujocoEnv(Env):
    FILE = None

    @autoargs.arg('action_noise',
                  type=float,
                  help='Noise added to the controls, which will be '
                  'proportional to the action bounds')
    def __init__(
        self,
        action_noise=0.0,
        file_path=None,
        template_args=None,
        random_init_state=False,
    ):
        #Haoran: even if random_init_state
        # compile template
        if file_path is None:
            if self.__class__.FILE is None:
                raise "Mujoco file not specified"
            file_path = osp.join(MODEL_DIR, self.__class__.FILE)
        if file_path.endswith(".mako"):
            lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR])
            with open(file_path) as template_file:
                template = mako.template.Template(template_file.read(),
                                                  lookup=lookup)
            content = template.render(
                opts=template_args if template_args is not None else {}, )
            tmp_f, file_path = tempfile.mkstemp(text=True)
            with open(file_path, 'w') as f:
                f.write(content)
            self.model = MjModel(file_path)
            os.close(tmp_f)
            # os.remove(file_path)
            # TODO
        else:
            self.model = MjModel(file_path)
        self.data = self.model.data
        self.viewer = None
        self.init_qpos = self.model.data.qpos
        self.init_qvel = self.model.data.qvel
        self.init_qacc = self.model.data.qacc
        self.init_ctrl = self.model.data.ctrl
        self.init_damping = (self.model.dof_damping[:, 0]).copy()
        self.init_armature = (self.model.dof_armature[:, 0]).copy()
        self.init_frictionloss = (self.model.dof_frictionloss[:, 0]).copy()
        self.qpos_dim = self.init_qpos.size
        self.qvel_dim = self.init_qvel.size
        self.ctrl_dim = self.init_ctrl.size
        self.action_noise = action_noise
        self.random_init_state = random_init_state
        if "frame_skip" in self.model.numeric_names:
            frame_skip_id = self.model.numeric_names.index("frame_skip")
            addr = self.model.numeric_adr.flat[frame_skip_id]
            self.frame_skip = int(self.model.numeric_data.flat[addr])
        else:
            self.frame_skip = 1
        if "init_qpos" in self.model.numeric_names:
            init_qpos_id = self.model.numeric_names.index("init_qpos")
            addr = self.model.numeric_adr.flat[init_qpos_id]
            size = self.model.numeric_size.flat[init_qpos_id]
            init_qpos = self.model.numeric_data.flat[addr:addr + size]
            self.init_qpos = init_qpos
        self.dcom = None
        self.current_com = None
        self.reset()
        super(MujocoEnv, self).__init__()

    @property
    @overrides
    def action_space(self):
        bounds = self.model.actuator_ctrlrange
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        return spaces.Box(lb, ub)

    @property
    @overrides
    def observation_space(self):
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        return spaces.Box(ub * -1, ub)

    @property
    def action_bounds(self):
        return self.action_space.bounds

    def reset_mujoco(self, init_state=None):
        # init_state = (0.387, 1.137, -2.028, -1.744, 2.029, -0.873, 1.55, 0, 0)
        if init_state is None:
            if self.random_init_state:
                self.model.data.qpos = self.init_qpos + \
                    np.random.normal(size=self.init_qpos.shape) * 0.01
                self.model.data.qvel = self.init_qvel + \
                    np.random.normal(size=self.init_qvel.shape) * 0.1
            else:
                self.model.data.qpos = self.init_qpos
                self.model.data.qvel = self.init_qvel

            self.model.data.qacc = self.init_qacc
            self.model.data.ctrl = self.init_ctrl
        else:
            start = 0
            for datum_name in ["qpos", "qvel", "qacc", "ctrl"]:
                datum = getattr(self.model.data, datum_name)
                datum_dim = datum.shape[0]
                datum = init_state[start:start + datum_dim]
                if len(datum) == 0:
                    datum = getattr(self, 'init_' + datum_name)
                setattr(self.model.data, datum_name, datum)
                start += datum_dim
                self.model.forward()
        # print("inside mujoco reset: ", self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl)

    @overrides
    def reset(self, init_state=None, *args, **kwargs):
        self.reset_mujoco(init_state)
        self.model.forward()
        self.current_com = self.model.data.com_subtree[0]
        self.dcom = np.zeros_like(self.current_com)
        # print("outside mujoco reset: ", self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl)
        return self.get_current_obs()

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.model.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.model.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate(
            [self.model.data.qpos.flat, self.model.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.model.data.qpos,
            self.model.data.qvel,
            self.model.data.qacc,
            self.model.data.ctrl,
        ]).ravel()

    def inject_action_noise(self, action):
        # generate action noise
        noise = self.action_noise * \
                np.random.normal(size=action.shape)
        # rescale the noise to make it proportional to the action bounds
        lb, ub = self.action_bounds
        noise = 0.5 * (ub - lb) * noise
        return action + noise

    def forward_dynamics(self, action):
        self.model.data.ctrl = self.inject_action_noise(action)
        for _ in range(self.frame_skip):
            self.model.step()
        self.model.forward()
        new_com = self.model.data.com_subtree[0]
        self.dcom = new_com - self.current_com
        self.current_com = new_com

    def get_viewer(self, config=None):
        if self.viewer is None:
            self.viewer = MjViewer()
            self.viewer.start()
            self.viewer.set_model(self.model)
        if config is not None:
            self.viewer.set_window_pose(config["xpos"], config["ypos"])
            self.viewer.set_window_size(config["width"], config["height"])
            self.viewer.set_window_title(config["title"])
        return self.viewer

    def setup_camera(self, cam_pos=None, viewer=None):
        """Setup camera
        """
        if cam_pos is None:
            cam_pos = self._params["cam_pos"]
        if viewer is None:
            viewer = self._viewer
        viewer.cam.lookat[0] = cam_pos[0]
        viewer.cam.lookat[1] = cam_pos[1]
        viewer.cam.lookat[2] = cam_pos[2]
        viewer.cam.distance = cam_pos[3]
        viewer.cam.elevation = cam_pos[4]
        viewer.cam.azimuth = cam_pos[5]
        viewer.cam.trackbodyid = -1

    def render(self, close=False, mode='human', config=None):
        if mode == 'human':
            viewer = self.get_viewer(config=config)
            viewer.loop_once()
        elif mode == 'rgb_array':
            viewer = self.get_viewer(config=config)
            viewer.loop_once()
            # self.get_viewer(config=config).render()
            data, width, height = self.get_viewer(config=config).get_image()
            return np.fromstring(data,
                                 dtype='uint8').reshape(height, width,
                                                        3)[::-1, :, :]
        if close:
            self.stop_viewer()

    def start_viewer(self):
        viewer = self.get_viewer()
        if not viewer.running:
            viewer.start()

    def stop_viewer(self):
        if self.viewer:
            self.viewer.finish()
            self.viewer = None

    def release(self):
        # temporarily alleviate the issue (but still some leak)
        from rllab.mujoco_py.mjlib import mjlib
        mjlib.mj_deleteModel(self.model._wrapped)
        mjlib.mj_deleteData(self.data._wrapped)

    def get_body_xmat(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.model.data.xmat[idx].reshape((3, 3))

    def get_body_com(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.model.data.com_subtree[idx]

    def get_body_comvel(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.model.body_comvels[idx]

    def print_stats(self):
        super(MujocoEnv, self).print_stats()
        print("qpos dim:\t%d" % len(self.model.data.qpos))

    def action_from_key(self, key):
        raise NotImplementedError

    def set_state_tmp(self, state, restore=True):
        if restore:
            prev_pos = self.model.data.qpos
            prev_qvel = self.model.data.qvel
            prev_ctrl = self.model.data.ctrl
            prev_act = self.model.data.act
        qpos, qvel = self.decode_state(state)
        self.model.data.qpos = qpos
        self.model.data.qvel = qvel
        self.model.forward()
        yield
        if restore:
            self.model.data.qpos = prev_pos
            self.model.data.qvel = prev_qvel
            self.model.data.ctrl = prev_ctrl
            self.model.data.act = prev_act
            self.model.forward()

    def get_param_values(self):
        return {}

    def set_param_values(self, values):
        pass
Ejemplo n.º 8
0
class PR2Env(MujocoEnv, Serializable):
    FILE = 'pr2_legofree.xml'

    def __init__(self, model='pr2.xml', *args, **kwargs):

        self.goal = None
        self.action_penalty = 0.5 * 1e-2
        if model not in [None, 0]:
            self.set_model(model)

        super(PR2Env, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals())

    def set_model(self, model):
        self.__class__.FILE = model

    def get_current_obs(self):
        return np.concatenate([
            self.model.data.qpos.flat[:-3],
            self.model.data.qvel.
            flat[:
                 -3],  # Do not include the velocity of the target (should be 0).
            self.get_vec_tip_to_goal(),
        ]).reshape(-1)

    def get_tip_position(self):
        return self.model.data.site_xpos[0]

    def get_vec_tip_to_goal(self):
        tip_position = self.get_tip_position()
        goal_position = self.goal
        vec_tip_to_goal = goal_position - tip_position
        return vec_tip_to_goal

    def step(self, action):
        vec_tip_to_goal = self.get_vec_tip_to_goal()

        self.forward_dynamics(action)
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        reward_ctrl = -self.action_penalty * np.sum(np.square(
            action / scaling))

        distance_tip_to_goal = np.sum(np.square(vec_tip_to_goal))
        reward_tip = -distance_tip_to_goal

        reward = reward_tip + reward_ctrl  # + reward_occlusion
        done = False

        self.time_step += 1
        if self.max_path_length and self.time_step > self.max_path_length:
            done = True

        ob = self.get_current_obs()

        return ob, float(reward), done, {}

    def get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer()
            self.viewer.start()
            self.viewer.set_model(self.model)
            self.viewer.cam.camid = 0
        return self.viewer

    @overrides
    def reset_mujoco(self, qpos=None, qvel=None):
        qpos = self.init_qpos + np.random.normal(
            size=self.init_qpos.shape) * 0.01
        self.goal = np.random.uniform([0.4, 0.25, 0.6], [0.6, 0.75, 1.])
        qpos[-3:, 0] = self.goal

        qvel = self.init_qvel + np.random.normal(
            size=self.init_qvel.shape) * 0.1
        qvel[-3:, 0] = 0

        self.model.data.qpos = qpos
        self.model.data.qvel = qvel
        self.model.data.qacc = self.init_qacc
        self.model.data.ctrl = self.init_ctrl
Ejemplo n.º 9
0
class MujocoEnv(Env):
    FILE = None

    @autoargs.arg('action_noise', type=float,
                  help='Noise added to the controls, which will be '
                       'proportional to the action bounds')
    def __init__(self, action_noise=0.0, file_path=None, template_args=None):
        # compile template
        if file_path is None:
            if self.__class__.FILE is None:
                raise "Mujoco file not specified"
            file_path = osp.join(MODEL_DIR, self.__class__.FILE)
        if file_path.endswith(".mako"):
            lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR])
            with open(file_path) as template_file:
                template = mako.template.Template(
                    template_file.read(), lookup=lookup)
            content = template.render(
                opts=template_args if template_args is not None else {},
            )
            tmp_f, file_path = tempfile.mkstemp(text=True)
            with open(file_path, 'w') as f:
                f.write(content)
            self.model = MjModel(file_path)
            os.close(tmp_f)
        else:
            self.model = MjModel(file_path)
        self.data = self.model.data
        self.viewer = None
        self.init_qpos = self.model.data.qpos
        self.init_qvel = self.model.data.qvel
        self.init_qacc = self.model.data.qacc
        self.init_ctrl = self.model.data.ctrl
        self.qpos_dim = self.init_qpos.size
        self.qvel_dim = self.init_qvel.size
        self.ctrl_dim = self.init_ctrl.size
        self.action_noise = action_noise
        if "frame_skip" in self.model.numeric_names:
            frame_skip_id = self.model.numeric_names.index("frame_skip")
            addr = self.model.numeric_adr.flat[frame_skip_id]
            self.frame_skip = int(self.model.numeric_data.flat[addr])
        else:
            self.frame_skip = 1
        if "init_qpos" in self.model.numeric_names:
            init_qpos_id = self.model.numeric_names.index("init_qpos")
            addr = self.model.numeric_adr.flat[init_qpos_id]
            size = self.model.numeric_size.flat[init_qpos_id]
            init_qpos = self.model.numeric_data.flat[addr:addr + size]
            self.init_qpos = init_qpos
        self.dcom = None
        self.current_com = None
        self.reset()
        super(MujocoEnv, self).__init__()

    @property
    @overrides
    def action_space(self):
        bounds = self.model.actuator_ctrlrange
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        return spaces.Box(lb, ub)

    @property
    @overrides
    def observation_space(self):
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        return spaces.Box(ub * -1, ub)

    @property
    def action_bounds(self):
        return self.action_space.bounds

    def reset_mujoco(self):
        self.model.data.qpos = self.init_qpos + \
                               np.random.normal(size=self.init_qpos.shape) * 0.01
        self.model.data.qvel = self.init_qvel + \
                               np.random.normal(size=self.init_qvel.shape) * 0.1
        self.model.data.qacc = self.init_qacc
        self.model.data.ctrl = self.init_ctrl

    @overrides
    def reset(self):
        self.reset_mujoco()
        self.model.forward()
        self.current_com = self.model.data.com_subtree[0]
        self.dcom = np.zeros_like(self.current_com)
        return self.get_current_obs()

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.model.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.model.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate([
            self.model.data.qpos.flat,
            self.model.data.qvel.flat
        ])

    def inject_action_noise(self, action):
        # generate action noise
        noise = self.action_noise * \
                np.random.normal(size=action.shape)
        # rescale the noise to make it proportional to the action bounds
        lb, ub = self.action_bounds
        noise = 0.5 * (ub - lb) * noise
        return action + noise

    def forward_dynamics(self, action):
        self.model.data.ctrl = self.inject_action_noise(action)
        for _ in range(self.frame_skip):
            self.model.step()
        self.model.forward()
        new_com = self.model.data.com_subtree[0]
        self.dcom = new_com - self.current_com
        self.current_com = new_com

    def get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer()
            self.viewer.start()
            self.viewer.set_model(self.model)
        return self.viewer

    def render(self):
        viewer = self.get_viewer()
        viewer.loop_once()

    def start_viewer(self):
        viewer = self.get_viewer()
        if not viewer.running:
            viewer.start()

    def stop_viewer(self):
        if self.viewer:
            self.viewer.finish()

    def release(self):
        # temporarily alleviate the issue (but still some leak)
        from rllab.mujoco_py.mjlib import mjlib
        mjlib.mj_deleteModel(self.model._wrapped)
        mjlib.mj_deleteData(self.data._wrapped)

    def get_body_xmat(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.model.data.xmat[idx].reshape((3, 3))

    def get_body_com(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.model.data.com_subtree[idx]

    def get_body_comvel(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.model.body_comvels[idx]

    def print_stats(self):
        super(MujocoEnv, self).print_stats()
        print "qpos dim:\t%d" % len(self.model.data.qpos)

    def action_from_key(self, key):
        raise NotImplementedError