Example #1
0
def test_ignore_mujoco_warnings():
    # Two boxes on a plane need more than 1 contact (nconmax)
    xml = '''
    <mujoco>
      <size nconmax="1"/>
      <worldbody>
        <geom type="plane" size="1 1 0.1"/>
        <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body>
        <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body>
      </worldbody>
    </mujoco>
    '''
    model = load_model_from_xml(xml)
    sim = MjSim(model)

    sim.reset()
    with ignore_mujoco_warnings():
        # This should raise an exception due to the mujoco warning callback,
        # but it's suppressed by the context manager.
        sim.step()

    sim.reset()
    with pytest.raises(Exception):
        # test to make sure previous warning callback restored.
        sim.step()
Example #2
0
def test_mj_sim_basics():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model, nsubsteps=2)

    sim.reset()
    sim.step()
    sim.reset()
    sim.forward()
Example #3
0
def test_mj_warning_raises():
    ''' Test that MuJoCo warnings cause exceptions. '''
    # Two boxes on a plane need more than 1 contact (nconmax)
    xml = '''
    <mujoco>
      <size nconmax="1"/>
      <worldbody>
        <geom type="plane" size="1 1 0.1"/>
        <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body>
        <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body>
      </worldbody>
    </mujoco>
    '''
    model = load_model_from_xml(xml)
    sim = MjSim(model)

    sim.reset()
    with pytest.raises(Exception):
        # This should raise an exception due to the mujoco warning callback
        sim.step()
Example #4
0
def test_mj_warning_raises():
    ''' Test that MuJoCo warnings cause exceptions. '''
    # Two boxes on a plane need more than 1 contact (nconmax)
    xml = '''
    <mujoco>
      <size nconmax="1"/>
      <worldbody>
        <geom type="plane" size="1 1 0.1"/>
        <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body>
        <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body>
      </worldbody>
    </mujoco>
    '''
    model = load_model_from_xml(xml)
    sim = MjSim(model)

    sim.reset()
    with pytest.raises(Exception):
        # This should raise an exception due to the mujoco warning callback
        sim.step()
Example #5
0
def test_xvelr():  # xvelr = rotational velocity in world frame
    xml = """
    <mujoco>
        <worldbody>
            <body name="body1" pos="0 0 0">
                <joint name="a" axis="1 0 0" pos="0 0 0" type="hinge"/>
                <geom name="geom1" pos="0 0 0" size="0.3"/>
                <body name="body2" pos="0 0 1">
                    <joint name="b" axis="1 0 0" pos="0 0 0" type="hinge"/>
                    <geom name="geom2" pos="0 0 0" size="0.3"/>
                    <site name="site1" size="0.1"/>
                </body>
            </body>
        </worldbody>
        <actuator>
            <motor joint="a"/>
            <motor joint="b"/>
        </actuator>
    </mujoco>
    """
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    sim.reset()
    sim.forward()
    # Check that xvelr starts out at zero (since qvel is zero)
    site1_xvelr = sim.data.get_site_xvelr('site1')
    np.testing.assert_allclose(site1_xvelr, np.zeros(3))
    # Push the base body and step forward to get it moving
    sim.data.ctrl[0] = 1e9
    sim.step()
    sim.forward()
    # Check that the first body has nonzero xvelr
    body1_xvelr = sim.data.get_body_xvelr('body1')
    assert not np.allclose(body1_xvelr, np.zeros(3))
    # Check that the second body has zero xvelr (still)
    body2_xvelr = sim.data.get_body_xvelr('body2')
    np.testing.assert_allclose(body2_xvelr, np.zeros(3))
    # Check that this matches the batch (gathered) getter property
    np.testing.assert_allclose(body2_xvelr, sim.data.body_xvelr[2])
Example #6
0
def test_xvelr():  # xvelr = rotational velocity in world frame
    xml = """
    <mujoco>
        <worldbody>
            <body name="body1" pos="0 0 0">
                <joint name="a" axis="1 0 0" pos="0 0 0" type="hinge"/>
                <geom name="geom1" pos="0 0 0" size="0.3"/>
                <body name="body2" pos="0 0 1">
                    <joint name="b" axis="1 0 0" pos="0 0 0" type="hinge"/>
                    <geom name="geom2" pos="0 0 0" size="0.3"/>
                    <site name="site1" size="0.1"/>
                </body>
            </body>
        </worldbody>
        <actuator>
            <motor joint="a"/>
            <motor joint="b"/>
        </actuator>
    </mujoco>
    """
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    sim.reset()
    sim.forward()
    # Check that xvelr starts out at zero (since qvel is zero)
    site1_xvelr = sim.data.get_site_xvelr('site1')
    np.testing.assert_allclose(site1_xvelr, np.zeros(3))
    # Push the base body and step forward to get it moving
    sim.data.ctrl[0] = 1e9
    sim.step()
    sim.forward()
    # Check that the first body has nonzero xvelr
    body1_xvelr = sim.data.get_body_xvelr('body1')
    assert not np.allclose(body1_xvelr, np.zeros(3))
    # Check that the second body has zero xvelr (still)
    body2_xvelr = sim.data.get_body_xvelr('body2')
    np.testing.assert_allclose(body2_xvelr, np.zeros(3))
    # Check that this matches the batch (gathered) getter property
    np.testing.assert_allclose(body2_xvelr, sim.data.body_xvelr[2])
Example #7
0
def gen_robot_configs(ref_file, robot_num, vars_to_change, param_var_num,
                      skip_geom, root_save_dir):
    pre_gen_params = pre_gen_robot_params(vars_to_change=vars_to_change,
                                          param_var_num=param_var_num)
    joint_sites = ['j0', 'j1', 'j2', 'j3', 'j4', 'j5', 'j6']
    for idx in tqdm(range(robot_num)):
        model = load_model_from_path(ref_file)
        sim = MjSim(model)
        valid_joint_sites = [
            el for el in joint_sites if el in sim.model._site_name2id
        ]
        valid_joints_num = len(valid_joint_sites)

        robot_param_random_sample(sim, vars_to_change, pre_gen_params,
                                  valid_joints_num, skip_geom)
        sim.model.vis.map.znear = 0.02
        sim.model.vis.map.zfar = 50.0
        sim.reset()
        robot_id = int(re.findall(r'\d+', ref_file.split('/')[-1])[0])
        save_dir = os.path.join(
            root_save_dir, '%d_dof_%d_%d' % (valid_joints_num, robot_id, idx))
        os.makedirs(save_dir, exist_ok=True)
        with open(os.path.join(save_dir, 'robot.xml'), 'w') as f:
            sim.save(f, keep_inertials=True)
Example #8
0
class MujocoEnv(gym.Env):
    FILE = None

    @autoargs.arg('action_noise',
                  type=float,
                  help='Noise added to the controls, which will be '
                  'proportional to the action bounds')
    def __init__(self, action_noise=0.0, file_path=None, template_args=None):
        # compile template
        if file_path is None:
            if self.__class__.FILE is None:
                raise NotImplementedError("Mujoco file not specified")
            file_path = osp.join(MODEL_DIR, self.__class__.FILE)
        if file_path.endswith(".mako"):
            lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR])
            with open(file_path) as template_file:
                template = mako.template.Template(template_file.read(),
                                                  lookup=lookup)
            content = template.render(
                opts=template_args if template_args is not None else {}, )
            tmp_f, file_path = tempfile.mkstemp(suffix=".xml", text=True)
            with open(file_path, 'w') as f:
                f.write(content)
            self.model = load_model_from_path(file_path)
            os.close(tmp_f)
        else:
            self.model = load_model_from_path(file_path)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.render_width = 512
        self.render_height = 512
        self.render_camera = None
        self.init_qpos = self.sim.data.qpos
        self.init_qvel = self.sim.data.qvel
        self.init_qacc = self.sim.data.qacc
        self.init_ctrl = self.sim.data.ctrl
        self.qpos_dim = self.init_qpos.size
        self.qvel_dim = self.init_qvel.size
        self.ctrl_dim = self.init_ctrl.size
        self.action_noise = action_noise
        self.frame_skip = 1
        self.dcom = None
        self.current_com = None
        self.reset()
        super().__init__()

    @cached_property
    @overrides
    def action_space(self):
        bounds = self.model.actuator_ctrlrange
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        return gym.spaces.Box(lb, ub, dtype=np.float32)

    @cached_property
    @overrides
    def observation_space(self):
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        return gym.spaces.Box(ub * -1, ub, dtype=np.float32)

    @property
    def action_bounds(self):
        assert isinstance(self.action_space, gym.spaces.Box)
        return self.action_space.low, self.action_space.high

    def reset_mujoco(self, init_state=None):
        self.sim.reset()
        if init_state is None:
            self.sim.data.qpos[:] = self.init_qpos + np.random.normal(
                size=self.init_qpos.shape) * 0.01
            self.sim.data.qvel[:] = self.init_qvel + np.random.normal(
                size=self.init_qvel.shape) * 0.1
            self.sim.data.qacc[:] = self.init_qacc
            self.sim.data.ctrl[:] = self.init_ctrl
        else:
            start = 0
            for datum_name in ["qpos", "qvel", "qacc", "ctrl"]:
                datum = getattr(self.sim.data, datum_name)
                datum_dim = datum.shape[0]
                datum = init_state[start:start + datum_dim]
                setattr(self.sim.data, datum_name, datum)
                start += datum_dim

    @overrides
    def reset(self, init_state=None):
        self.reset_mujoco(init_state)
        self.sim.forward()
        self.current_com = self.sim.data.subtree_com[0]
        self.dcom = np.zeros_like(self.current_com)
        return self.get_current_obs()

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.sim.data
        cdists = np.copy(self.sim.model.geom_margin).flat
        for c in self.sim.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            data.cinert.flat,
            data.cvel.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate(
            [self.sim.data.qpos.flat, self.sim.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.sim.data.qpos,
            self.sim.data.qvel,
            self.sim.data.qacc,
            self.sim.data.ctrl,
        ]).ravel()

    def inject_action_noise(self, action):
        # generate action noise
        noise = self.action_noise * np.random.normal(size=action.shape)
        # rescale the noise to make it proportional to the action bounds
        lb, ub = self.action_bounds
        noise = 0.5 * (ub - lb) * noise
        return action + noise

    def forward_dynamics(self, action):
        self.sim.data.ctrl[:] = self.inject_action_noise(action)
        for _ in range(self.frame_skip):
            self.sim.step()
        self.sim.forward()
        new_com = self.sim.data.subtree_com[0]
        self.dcom = new_com - self.current_com
        self.current_com = new_com

    def get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        return self.viewer

    def render(self, close=False, mode='human'):
        if mode == 'human':
            viewer = self.get_viewer()
            viewer.render()
            return None
        elif mode == 'rgb_array':
            img = self.sim.render(self.render_width,
                                  self.render_height,
                                  camera_name=self.render_camera)
            img = img[::-1, :, :]
            return img
        if close:
            self.stop_viewer()
        return None

    def start_viewer(self):
        viewer = self.get_viewer()
        if not viewer.running:
            viewer.start()

    def stop_viewer(self):
        if self.viewer:
            self.viewer.finish()

    def release(self):
        # temporarily alleviate the issue (but still some leak)
        functions.mj_deleteModel(self.sim._wrapped)
        functions.mj_deleteData(self.data._wrapped)

    def get_body_xmat(self, body_name):
        return self.data.get_body_xmat(body_name).reshape((3, 3))

    def get_body_com(self, body_name):
        return self.data.get_body_xpos(body_name)

    def get_body_comvel(self, body_name):
        return self.data.get_body_xvelp(body_name)

    def print_stats(self):
        super(MujocoEnv, self).print_stats()
        print("qpos dim:\t%d" % len(self.sim.data.qpos))

    def action_from_key(self, key):
        raise NotImplementedError
Example #9
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip=1, xml_string=""):
        """
        @param model_path path of the default model
        @param xml_string if given, the model will be reset using these values
        """

        fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.model = load_model_from_path(fullpath)
        with open(fullpath, 'r') as f:
            self.model_xml = f.read()
            self.default_xml = self.model_xml

        if xml_string != "":
            self.model = load_model_from_xml(xml_string)
            self.model_xml = xml_string

        self.frame_skip = frame_skip

        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.viewer = None
        self._viewers = {}

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([
            o.size for o in observation
        ]) if type(observation) is tuple else observation.size

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()
        self.set_param_space()

    def get_params(self):
        """
        Returns a dict of (param_name, param_value)
        """
        return MujocoUpdater(self.model_xml).get_params()

    def set_params(self, params):
        """
        @param params: dict(param_name, param_value)
        param_name should be a string of bodyname__type__paramname
        where type is either geom or joint,
        e.g. thigh__joint__friction,
        and param_value is a numpy array
        """
        # invalidate cached properties
        self.__dict__.pop('action_space', None)
        self.__dict__.pop('observation_space', None)

        new_xml = MujocoUpdater.set_params(self.model_xml, params)
        self.__init__(xml_string=new_xml)
        self.reset()
        return self

    def set_param_space(self, param_space=None, eps_scale=0.5, replace=True):
        """
        Set param_space
        @param param_space: dict(string, gym.space.base.Space)
        @param eps_scale: scale of variation applied to all params
        @param replace: if true, param_space overwrites default param_space.
                        Default behavior is to merge.
        """
        if param_space is not None:
            if replace:
                self._param_space = param_space
            else:
                self._param_space = {**self._param_space, **param_space}
        else:
            params = MujocoUpdater(self.model_xml).get_params()
            self._param_space = dict()
            for param, value in params.items():
                eps = np.abs(value) * eps_scale
                ub = value + eps
                lb = value - eps
                for name in positive_params:
                    if name in param:
                        lb = np.clip(lb, 1e-3, ub)
                        break
                space = spaces.Box(lb, ub)
                self._param_space[param] = space

    def get_geom_params(self, body_name):
        geom = MujocoUpdater(self.model_xml).get_geom(body_name)
        return {
            k: v
            for k, v in geom.attrib.items()
            if k not in MujocoUpdater.ignore_params
        }

    def get_joint_params(self, body_name):
        joint = MujocoUpdater(self.model_xml).get_joint(body_name)
        return {
            k: v
            for k, v in joint.attrib.items()
            if k not in MujocoUpdater.ignore_params
        }

    def get_body_names(self):
        return MujocoUpdater(self.model_xml).get_body_names()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.sim.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.sim.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate(
            [self.sim.data.qpos.flat, self.sim.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.sim.data.qpos,
            self.sim.data.qvel,
            self.sim.data.qacc,
            self.sim.data.ctrl,
        ]).ravel()

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def reset(self, randomize=True):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    # Added for bayesian_rl
    def get_sim_state(self):
        return self.sim.get_state()

    # Added for bayesian_rl
    def set_sim_state(self, state):
        self.sim.set_state(state)

    # Added for bayesian_rl
    def set_state_vector(self, state_vector):
        qpos = state_vector[:self.model.nq]
        qvel = state_vector[self.model.nq:]
        self.set_state(qpos, qvel)

    # Added for bayesian_rl
    def get_state_vector(self):
        return self.state_vector()

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self, mode):
        self.viewer = self._viewers.get(mode)
        if self.viewer is None:
            if mode == 'human':
                self.viewer = mujoco_py.MjViewer(self.sim)
            elif mode == 'rgb_array' or mode == 'depth_array':
                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)

            self.viewer_setup()
            self._viewers[mode] = self.viewer
        return self.viewer

    def close(self):
        if self.viewer is not None:
            # self.viewer.finish()
            self.viewer = None
            self._viewers = {}

    # def step(self, a):
    #     return self._step(a)

    # Added for bayesian_rl
    def take_action(self, a):
        self.step(a)
        return self.get_sim_state()

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['mean']
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=640,
                                             height=480,
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + '_' + str(ep) + ".mp4"
            skvideo.io.vwrite(file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))

    def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
        if mode == 'rgb_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=False)
            # original image is upside-down, so flip it
            return data[::-1, :, :]
        elif mode == 'depth_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            # Extract depth part of the read_pixels() tuple
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=True)[1]
            # original image is upside-down, so flip it
            return data[::-1, :]
        elif mode == 'human':
            self._get_viewer(mode).render()
Example #10
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=640, height=480, mode='offscreen',
                                             camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))
Example #11
0
    viewer.render()


visualize = False
# ---------------- Model to select ----------------
# model = load_model_from_path("position_control/env_position_control.xml")
# model = load_model_from_path("P_control/env_P_control.xml")
model = load_model_from_path("PID_control/env_PID_control.xml")
# ---------------- Program ----------------
sim = MjSim(model)
cymj.set_pid_control(sim.model, sim.data)
if visualize:
    viewer = mj_viewer_setup(sim)
dt = sim.model.opt.timestep

sim.reset()
sim.data.ctrl[:] = 0
points = []
reference = []
for t in range(800):  # 200 is maximal episode steps count
    if t > 50:
        sim.data.ctrl[9] = 0.3
    if t > 250:
        sim.data.ctrl[9] = 0.6
    if t > 500:
        sim.data.ctrl[9] = 0.3
    sim.step()
    points.append(sim.data.qpos[9])
    reference.append(sim.data.ctrl[9])
    if visualize:
        mj_render(viewer)
Example #12
0
class PegInsertionEnv(BaseEnv):
    def __init__(self,
                 robot_folders,
                 robot_dir,
                 substeps,
                 tol=0.02,
                 train=True,
                 with_kin=None,
                 with_dyn=None,
                 multi_goal=False,
                 ):
        super().__init__(robot_folders=robot_folders,
                         robot_dir=robot_dir,
                         substeps=substeps,
                         tol=tol,
                         train=train,
                         with_kin=with_kin,
                         with_dyn=with_dyn,
                         multi_goal=multi_goal)

    def reset(self, robot_id=None):
        if robot_id is None:
            self.robot_id = np.random.randint(0, self.train_robot_num, 1)[0]
        else:
            self.robot_id = robot_id
        self.reset_robot(self.robot_id)

        ob = self.get_obs()
        self.ep_reward = 0
        self.ep_len = 0
        return ob

    def step(self, action):
        scaled_action = self.scale_action(action[:self.act_dim])
        self.sim.data.ctrl[:self.act_dim] = scaled_action
        self.sim.step()
        ob = self.get_obs()
        peg_target = self.sim.data.get_site_xpos('target'),
        reward, dist, done = self.cal_reward(ob[1].copy(),
                                             peg_target,
                                             action)

        self.ep_reward += reward
        self.ep_len += 1
        info = {'reward_so_far': self.ep_reward, 'steps_so_far': self.ep_len,
                'reward': reward, 'step': 1, 'dist': dist}
        return ob, reward, done, info

    def gen_random_delta_xyz(self):
        delta_xyz_range = np.array([[-0.20, 0.20],
                                    [-0.20, 0.20],
                                    [-0.20, 0.20]])

        starts = delta_xyz_range[:, 0]
        widths = delta_xyz_range[:, 1] - delta_xyz_range[:, 0]
        ran_num = np.random.random(size=(delta_xyz_range.shape[0]))
        delta_xyz = starts + widths * ran_num
        return delta_xyz

    def reset_robot(self, robot_id):
        self.robot_folder_id = self.dir2id[self.robots[robot_id]]
        robot_file = os.path.join(self.robots[robot_id], 'robot.xml')
        self.model = load_model_from_path(robot_file)
        self.sim = MjSim(self.model, nsubsteps=self.nsubsteps)
        self.update_action_space()
        if self.multi_goal:
            table_name = 'table'
            table_id = self.sim.model._body_name2id[table_name]
            self.sim.model.body_pos[table_id] += self.gen_random_delta_xyz()
        self.sim.reset()
        self.sim.step()
        if self.viewer is not None:
            self.viewer = MjViewer(self.sim)
Example #13
0
class BlockWordEnv:
    def __init__(self,
                 env_file,
                 random_color=None,
                 random_num=5,
                 debug=False,
                 random_seed=0):
        self.env_file = env_file
        self.random_color = random_color
        self.random_num = random_num
        self.debug = debug
        self.model = load_model_from_path(env_file)
        self.sim = MjSim(self.model, nsubsteps=1)
        self.sim.model.vis.map.znear = 0.02
        self.sim.model.vis.map.zfar = 50.0
        self.cube_size = self.sim.model.geom_size[
            self.model._geom_name2id['cube_0']]
        self.cuboid_size = self.sim.model.geom_size[
            self.model._geom_name2id['cuboid_0']]
        self.cube_num = len(
            [i for i in self.sim.model.geom_names if 'cube_' in i])
        self.cuboid_num = len(
            [i for i in self.sim.model.geom_names if 'cuboid_' in i])
        if self.debug:
            print('total cube num:', self.cube_num)
            print('total cuboid num:', self.cuboid_num)
        self.max_num_per_type = max(self.cube_num, self.cuboid_num)
        self.center_bounds = [
            -0.25, 0.25
        ]  # [0, self.cuboid_size[0] * self.max_num_per_type]
        self.pos_candidates = np.arange(
            self.center_bounds[0], self.center_bounds[1] + self.cube_size[0],
            self.cube_size[0])
        self.modder = TextureModder(self.sim, random_state=random_seed)
        self.cur_id = {'cube': 0, 'cuboid': 0}
        self.viewer = None
        self.active_blocks = []
        if random_color:
            self.reset_viewer()

    def reset(self):
        self.sim.reset()
        self.sim.step()
        self.active_blocks = []
        self.cur_id = {'cube': 0, 'cuboid': 0}
        if self.random_color:
            self.randomize_color()
        if self.viewer is not None:
            self.reset_viewer()

    def randomize_color(self):
        self.modder.whiten_materials()
        for name in self.sim.model.geom_names:
            if 'table' in name:
                continue
            self.modder.rand_all(name)

    def simulate_one_epoch(self):
        self.gen_constrained_ran_bk_configs()
        imgs = []
        for i in range(self.random_num):
            img = self.get_img()
            imgs.append(img.copy())
            self.randomize_color()
        stable = self.check_stability(render=False)
        return imgs, stable

    def step(self):
        self.sim.step()

    def forward(self):
        self.sim.forward()

    def get_active_bk_states(self):
        bk_poses = []
        for bk_name in self.active_blocks:
            pose = self.sim.data.get_joint_qpos(bk_name)
            bk_poses.append(pose)
        return np.array(bk_poses)

    def check_stability(self, render=False):
        self.sim.step()
        prev_poses = self.get_active_bk_states()
        for i in range(400):
            self.sim.step()
            if render:
                self.render()
        post_poses = self.get_active_bk_states()
        diff = np.abs(post_poses - prev_poses)
        diff_norm = np.linalg.norm(diff[:, :3], axis=1)
        if self.debug:
            print('prev:')
            print(prev_poses[:, :3])
            print('post')
            print(post_poses[:, :3])
            print('diff norm:', diff_norm)
        stable = np.all(diff_norm < 0.01)
        if self.debug:
            print('Current configuration stable:', stable)
        return stable

    def render(self):
        if self.viewer is None:
            self.reset_viewer()
        self.viewer.render()

    def reset_viewer(self):
        self.viewer = MjViewer(self.sim)
        self.viewer.cam.lookat[:3] = np.array([0, 0, 0.1])
        self.viewer.cam.distance = 2
        self.viewer.cam.elevation = -20

    def move_given_block(self, name, target_pos):
        prev_pose = self.sim.data.get_joint_qpos(name)
        post_pose = prev_pose.copy()
        post_pose[:3] = target_pos
        self.sim.data.set_joint_qpos(name, post_pose)
        self.active_blocks.append(name)
        if self.debug:
            print('{0:s} pose after moving:'.format(name), post_pose)

    def move_given_blocks(self, block_dict):
        for bk_name, pos in block_dict.items():
            self.move_given_block(bk_name, pos)

    def move_block_for_demo(self, name, target_pos):
        prev_pose = self.sim.data.get_joint_qpos(name)
        if self.debug:
            print('{0:s} pose before moving:'.format(name), prev_pose)
        post_pose = prev_pose.copy()
        planned_path = []
        up_steps = 20
        h_steps = 30
        down_steps = 20
        planned_path.append(prev_pose.copy())
        for i in range(up_steps):
            tmp_pose = planned_path[-1].copy()
            tmp_pose[2] += (0.45 - prev_pose[2]) / float(up_steps)
            planned_path.append(tmp_pose.copy())
        for i in range(h_steps + 1):
            tmp_pose = planned_path[-1].copy()
            tmp_pose[0] = (target_pos[0] -
                           prev_pose[0]) * i / h_steps + prev_pose[0]
            tmp_pose[1] = (target_pos[1] -
                           prev_pose[1]) * i / h_steps + prev_pose[1]
            planned_path.append(tmp_pose.copy())
        for i in range(down_steps):
            tmp_pose = planned_path[-1].copy()
            tmp_pose[2] -= (0.45 - target_pos[2]) / float(down_steps)
            planned_path.append(tmp_pose.copy())
        post_pose[:3] = target_pos
        planned_path.append(post_pose.copy())
        for pos in planned_path:
            self.sim.data.set_joint_qpos(name, pos)
            time.sleep(0.02)
            self.sim.forward()
            self.render()
        self.active_blocks.append(name)
        if self.debug:
            print('{0:s} pose after moving:'.format(name), post_pose)

    def move_blocks_for_demo(self, block_dict):
        name = list(block_dict.keys())[0]
        target_pos = block_dict[name]
        initial_poses = {}
        for key in block_dict.keys():
            initial_poses[key] = self.sim.data.get_joint_qpos(key)
        prev_pose = self.sim.data.get_joint_qpos(name)
        if self.debug:
            print('{0:s} pose before moving:'.format(name), prev_pose)
        post_pose = prev_pose.copy()
        planned_delta_path = []
        planned_path = []
        up_steps = 20
        h_steps = 30
        down_steps = 20
        planned_path.append(prev_pose.copy())
        planned_delta_path.append(np.zeros_like(prev_pose))
        for i in range(up_steps):
            tmp_pose = planned_path[-1].copy()
            tmp_pose[2] += (0.45 - prev_pose[2]) / float(up_steps)
            planned_delta_path.append(tmp_pose - planned_path[-1])
            planned_path.append(tmp_pose.copy())
        for i in range(h_steps + 1):
            tmp_pose = planned_path[-1].copy()
            tmp_pose[0] = (target_pos[0] -
                           prev_pose[0]) * i / h_steps + prev_pose[0]
            tmp_pose[1] = (target_pos[1] -
                           prev_pose[1]) * i / h_steps + prev_pose[1]
            planned_delta_path.append(tmp_pose - planned_path[-1])
            planned_path.append(tmp_pose.copy())
        for i in range(down_steps):
            tmp_pose = planned_path[-1].copy()
            tmp_pose[2] -= (0.45 - target_pos[2]) / float(down_steps)
            planned_delta_path.append(tmp_pose - planned_path[-1])
            planned_path.append(tmp_pose.copy())
        post_pose[:3] = target_pos
        planned_delta_path.append(post_pose - planned_path[-1])
        planned_path.append(post_pose.copy())

        for delta_pos in planned_delta_path:
            for bk_name, target_bk_pos in block_dict.items():
                initial_poses[bk_name] = initial_poses[bk_name] + delta_pos
                self.sim.data.set_joint_qpos(bk_name, initial_poses[bk_name])
            time.sleep(0.02)
            self.sim.forward()
            self.render()
        self.active_blocks.append(name)
        if self.debug:
            print('{0:s} pose after moving:'.format(name),
                  self.sim.data.get_joint_qpos(name))

    def move_block(self, target_pos, bk_type='cube'):
        # center bounds: [0, 0.1 * 30]
        assert bk_type == 'cube' or bk_type == 'cuboid'
        bk_name = '{0:s}_{1:d}'.format(bk_type, self.cur_id[bk_type])
        prev_pose = self.sim.data.get_joint_qpos(bk_name)
        post_pose = prev_pose.copy()
        post_pose[:3] = target_pos

        self.sim.data.set_joint_qpos(bk_name, post_pose)
        self.active_blocks.append(bk_name)
        if self.debug:
            print(
                '{0:s}_{1:d} pose after moving:'.format(
                    bk_type, self.cur_id[bk_type]), post_pose)
        self.cur_id[bk_type] += 1

    def get_img(self):
        # return self.get_img_demo()
        img = self.sim.render(camera_name='camera',
                              width=600,
                              height=600,
                              depth=False)
        img = np.flipud(img)
        resized_img = cv2.resize(img[0:500, 50:550], (224, 224),
                                 cv2.INTER_AREA)
        return resized_img

    def get_img_demo(self):
        img = self.sim.render(camera_name='camera',
                              width=1200,
                              height=700,
                              depth=False)
        img = np.flipud(img)
        img = img[:, :, ::-1]
        return img

    def build_tower(self):
        layer_num = 1
        for i in range(20):
            z = (2 * layer_num - 1) * self.cube_size[2]
            y = 0
            x = 0
            target_pos = np.array([x, y, z])
            self.move_block(target_pos, bk_type='cube')
            layer_num += 1

    def gen_ran_bk_configs(self, render=False):
        while True:
            cuboid_num = np.random.choice(5, 1)[0]
            cube_num = np.random.choice(15, 1)[0]
            if cuboid_num > 0 or cube_num > 0:
                break
        total_num = cuboid_num + cube_num
        blocks = [0] * cube_num + [1] * cuboid_num
        permuted_blocks = np.random.permutation(blocks)
        cur_x = self.center_bounds[0]
        layer_num = 1
        for i in range(total_num):
            bk = permuted_blocks[i]
            bk_type = 'cube' if bk == 0 else 'cuboid'
            bk_size = self.cube_size if bk == 0 else self.cuboid_size
            z = (2 * layer_num - 1) * self.cube_size[2]
            y = 0
            if self.center_bounds[1] - cur_x < bk_size[0]:
                layer_num += 1
                cur_x = self.center_bounds[0]
                continue
            else:
                bk_lower_limit = cur_x + bk_size[0]
                pos_candidates = self.pos_candidates[
                    self.pos_candidates >= bk_lower_limit]
                x = np.random.choice(pos_candidates, 1)[0]
                cur_x = x + bk_size[0]
                target_pos = np.array([x, y, z])
                self.move_block(target_pos, bk_type=bk_type)
        self.sim.forward()
        if render:
            self.render()

    def gen_constrained_ran_bk_configs(self, render=False):
        # prob = np.exp(-0.1 * np.arange(30))
        while True:
            cuboid_num = np.random.choice(5, 1)[0]
            cube_num = np.random.choice(15, 1)[0]
            if cuboid_num > 0 or cube_num > 0:
                break
        if self.debug:
            print('Selected cube num:', cube_num)
            print('Selected cuboid num:', cuboid_num)
        total_num = cuboid_num + cube_num
        blocks = [0] * cube_num + [1] * cuboid_num
        permuted_blocks = np.random.permutation(blocks)
        cur_x = self.center_bounds[0]
        layer_num = 1
        layer_pos_candidates = self.pos_candidates.copy()
        filled_segs = []
        for i in range(total_num):
            bk = permuted_blocks[i]
            bk_type = 'cube' if bk == 0 else 'cuboid'
            bk_size = self.cube_size if bk == 0 else self.cuboid_size
            z = (2 * layer_num - 1) * self.cube_size[2]
            y = 0
            bk_lower_limit = cur_x + bk_size[0]
            pos_candidates = layer_pos_candidates[
                layer_pos_candidates >= bk_lower_limit]
            if pos_candidates.size < 1:
                layer_num += 1
                cur_x = self.center_bounds[0]
                layer_pos_candidates = self.pos_candidates.copy()
                good_ids = np.zeros_like(layer_pos_candidates, dtype=bool)
                for seg in filled_segs:
                    good_ids = np.logical_or(
                        good_ids,
                        np.logical_and(layer_pos_candidates >= seg[0],
                                       layer_pos_candidates <= seg[1]))
                layer_pos_candidates = layer_pos_candidates[good_ids]
                if self.debug:
                    print('Layer [{0:d}] pos candidates num: {1:d}'.format(
                        layer_num, layer_pos_candidates.size))
                if layer_pos_candidates.size < 1:
                    break
                filled_segs = []
                continue
            else:
                x = np.random.choice(pos_candidates, 1)[0]
                cur_x = x + bk_size[0]
                target_pos = np.array([x, y, z])
                self.move_block(target_pos, bk_type=bk_type)
                filled_segs.append([x - bk_size[0], cur_x])
        self.sim.forward()
        if render:
            self.render()
Example #14
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self._viewers = {}

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([
            o.size for o in observation
        ]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high, dtype=np.float32)

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def get_env_state(self):
        """
        Get full state of the environment beyond qpos and qvel
        For example, if targets are defined using sites, this function should also
        contain location of the sites (which are not included in qpos).
        Must return a dictionary that can be used in the set_env_state function
        """
        raise NotImplementedError

    def set_env_state(self, state):
        """
        Uses the state dictionary to set the state of the world
        """
        raise NotImplementedError

    def viewer_setup(self):
        """
        This method is called when the viewer is initialized.
        Optionally implement this method, if you need to tinker with camera position
        and so forth.
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            self.viewer._hide_overlay = True
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def render(self,
               mode='human',
               width=DEFAULT_SIZE,
               height=DEFAULT_SIZE,
               camera_id=None,
               camera_name=None):
        if mode == 'rgb_array':
            if camera_id is not None and camera_name is not None:
                raise ValueError("Both `camera_id` and `camera_name` cannot be"
                                 " specified at the same time.")

            no_camera_specified = camera_name is None and camera_id is None
            if no_camera_specified:
                camera_name = 'track'

            if camera_id is None and camera_name in self.model._camera_name2id:
                camera_id = self.model.camera_name2id(camera_name)

            self._get_viewer(mode).render(width, height, camera_id=camera_id)
            # window size used for old mujoco-py:
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=False)
            # original image is upside-down, so flip it
            return data[::-1, :, :]
        elif mode == 'depth_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            # Extract depth part of the read_pixels() tuple
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=True)[1]
            # original image is upside-down, so flip it
            return data[::-1, :]
        elif mode == 'human':
            self._get_viewer(mode).render()

    def close(self):
        if self.viewer is not None:
            # self.viewer.finish()
            self.viewer = None
            self._viewers = {}

    def _get_viewer(self, mode):
        self.viewer = self._viewers.get(mode)
        if self.viewer is None:
            if mode == 'human':
                self.viewer = mujoco_py.MjViewer(self.sim)
            elif mode == 'rgb_array' or mode == 'depth_array':
                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)

            self.viewer_setup()
            self._viewers[mode] = self.viewer
        return self.viewer

    def get_body_com(self, body_name):
        return self.data.get_body_xpos(body_name)

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640, 480),
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=frame_size[0],
                                             height=frame_size[1],
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite(file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))
class my_env(parameterized.TestCase):
    def __init__(self):
        # super(lab_env, self).__init__(env)
        # 导入xml文档
        self.model = load_model_from_path("assets/simpleEE_4box.xml")
        # 调用MjSim构建一个basic simulation
        self.sim = MjSim(model=self.model)
        self.sim = MjSim(self.model)

        self.viewer = MjViewer(self.sim)
        self.viewer._run_speed = 0.001
        self.timestep = 0
        # Sawyer Peg
        #self.init_qpos = np.array([-0.305, -0.83, 0.06086, 1.70464, -0.02976, 0.62496, -0.04712])
        # Flexiv Peg
        self.init_qpos = np.array([-0.22, -0.43, 0.449, -2, -0.25, 0.799, 0.99])

        for i in range(len(self.sim.data.qpos)):
            self.sim.data.qpos[i] = self.init_qpos[i]
        self.testQposFromSitePose(
            (np.array([0.57, 0.075, 0.08]), np.array([0.000000e+00, 1.000000e+00, 0.000000e+00, 6.123234e-17])),
            _INPLACE, True)

        print(self.sim.data.ctrl)
        print(self.sim.data.qpos)

    def get_state(self):
        self.sim.get_state()
        # 如果定义了相机
        # self.sim.data.get_camera_xpos('[camera name]')

    def reset(self):
        self.sim.reset()
        self.timestep = 0

    def step(self):
        # self.testQposFromSitePose((np.array([0.605, 0.075, 0.03]), np.array([0.000000e+00, 1.000000e+00, 0.000000e+00, 6.123234e-17])), _INPLACE)
        x=random.uniform(0.415, 0.635)
        y=random.uniform(-0.105, 0.105)

        self.testQposFromSitePose(
            (np.array([x, y, 0.045]), np.array([0.000000e+00, 1.000000e+00, 0.000000e+00, 6.123234e-17])),
            _INPLACE)
        # self.testQposFromSitePose(
        #     (None, np.array([0.000000e+00, 1.000000e+00, 0.000000e+00, 6.123234e-17])),
        #     _INPLACE, True)
        self.sim.step()
        # self.sim.data.ctrl[0] += 0.01
        # print(self.sim.data.ctrl)
        # pdb.set_trace()
        # print(self.sim.data.qpos)
        print("sensordata", self.sim.data.sensordata)
        # self.viewer.add_overlay(const.GRID_TOPRIGHT, " ", SESSION_NAME)
        self.viewer.render()
        self.timestep += 1

    def create_viewer(self, run_speed=0.0005):
        self.viewer = MjViewer(self.sim)
        self.viewer._run_speed = run_speed
        # self.viewer._hide_overlay = HIDE_OVERLAY
        # self.viewer.vopt.frame = DISPLAY_FRAME
        # self.viewer.cam.azimuth = CAM_AZIMUTH
        # self.viewer.cam.distance = CAM_DISTANCE
        # self.viewer.cam.elevation = CAM_ELEVATION

    def testQposFromSitePose(self, target, inplace, qpos_flag=False):

        physics = mujoco.Physics.from_xml_string(FlexivPeg_XML)
        target_pos, target_quat = target
        count = 0
        physics2 = physics.copy(share_model=True)
        resetter = _ResetArm(seed=0)
        while True:
          result = ik.qpos_from_site_pose(
              physics=physics2,
              site_name=_SITE_NAME,
              target_pos=target_pos,
              target_quat=target_quat,
              joint_names=_JOINTS,
              tol=_TOL,
              max_steps=_MAX_STEPS,
              inplace=inplace,
          )

          if result.success:
            break
          elif count < _MAX_RESETS:
            resetter(physics2)
            count += 1
          else:
            raise RuntimeError(
                'Failed to find a solution within %i attempts.' % _MAX_RESETS)

        self.assertLessEqual(result.steps, _MAX_STEPS)
        self.assertLessEqual(result.err_norm, _TOL)
        # pdb.set_trace()
        physics.data.qpos[:] = result.qpos
        for i in range(len(self.sim.data.qpos)):
            if qpos_flag:
                self.sim.data.qpos[i]=physics.data.qpos[i]
            else:
                self.sim.data.ctrl[i] = physics.data.qpos[i]
        # print(physics.data.qpos)
        mjlib.mj_fwdPosition(physics.model.ptr, physics.data.ptr)
        if target_pos is not None:
          pos = physics.named.data.site_xpos[_SITE_NAME]
          np.testing.assert_array_almost_equal(pos, target_pos)
        if target_quat is not None:
          xmat = physics.named.data.site_xmat[_SITE_NAME]
          quat = np.empty_like(target_quat)
          mjlib.mju_mat2Quat(quat, xmat)
          quat /= quat.ptp()  # Normalize xquat so that its max-min range is 1
class FrankaPickNPlace(object):
    def __init__(self, frame_skip=10, viewer=True):

        self.frame_skip = frame_skip

        model_path = './assets/franka_throw.xml'

        self.model = load_model_from_path(model_path)
        self.sim = MjSim(self.model, nsubsteps=frame_skip)
        self.data = self.sim.data
        if viewer:
            self.viewer = MjViewer(self.sim)
        self.act_mid = np.mean(self.model.actuator_ctrlrange, axis=1)
        self.act_rng = 0.5 * (self.model.actuator_ctrlrange[:, 1] -
                              self.model.actuator_ctrlrange[:, 0])

        nu = len(self.act_rng)
        self.action_space = Box(low=-1.,
                                high=1.,
                                shape=(nu, ),
                                dtype=np.float32)

        ob = self.get_obs(self.data)
        self.observation_space = Box(low=-np.inf,
                                     high=np.inf,
                                     shape=(len(ob), ),
                                     dtype=np.float32)

        self.obj_bid = self.model.body_name2id('object')
        self.grasp_sid = self.model.site_name2id('grasp')
        self.targ_sid = self.model.site_name2id('target')

        ## -- From the franka examples : 0, -M_PI_4, 0, -3 * M_PI_4, 0, M_PI_2, M_PI_4

        self.init_pose = np.array(
            [0., -np.pi / 4, 0., -3. * np.pi / 4, 0., np.pi / 2, np.pi / 4])
        init_pose = np.concatenate([self.init_pose, [0]])
        self.def_action = (init_pose - self.act_mid) / self.act_rng * 0

    def reset(self):

        self.sim.reset()
        self.sim.data.qpos[:7] = self.init_pose[:]
        # self.sim.data.qpos[0] = -0.3
        # self.sim.data.qpos[1] = -1.
        # self.sim.data.qpos[3] = -1.7
        # self.sim.data.qpos[5] = 1.
        self.sim.forward()
        ob = self.get_obs(self.data)
        return ob

    def get_obs(self, data):
        return np.concatenate([data.qpos.ravel(), data.qvel.ravel()])

    def get_rew(self, data):
        grasp_pos = data.site_xpos[self.grasp_sid].ravel()
        grasp_config = data.site_xmat[self.grasp_sid].reshape(3, 3)

        obj_pos = data.body_xpos[self.obj_bid].ravel()
        obj_config = mat2quat(data.body_xmat[self.obj_bid].reshape(3, 3))
        target_pos = data.site_xpos[self.targ_sid].ravel()
        target_config = mat2quat(data.site_xmat[self.targ_sid].reshape(3, 3))

        grasp_err = np.linalg.norm(grasp_pos - obj_pos)

        target_err = np.linalg.norm(obj_pos - target_pos)

        config_err = np.linalg.norm(target_config - obj_config)

        return np.log(
            grasp_err + 0.005
        ) * 0. + grasp_err + 10 * config_err + 2. * target_err  # + np.log(target_err + 0.005)

    # def get_rew(self, data):
    #     grasp_pos       = data.site_xpos[self.grasp_sid].ravel()
    #     grasp_config    = data.site_xmat[self.grasp_sid].reshape(3, 3)
    #
    #     obj_pos      = data.body_xpos[self.obj_bid].ravel()
    #     obj_config   = mat2quat(data.body_xmat[self.obj_bid].reshape(3,3))
    #     target_pos   = data.site_xpos[self.targ_sid].ravel()
    #     target_config = mat2quat(data.site_xmat[self.targ_sid].reshape(3,3))
    #
    #     grasp_err       = np.linalg.norm(grasp_pos - np.array([0.5,0.,0.5]))
    #
    #
    #     return np.log(grasp_err + 0.005) + grasp_err

    def step(self, a):

        ctrl = np.clip(a, -1.0, 1.0)
        ctrl = self.act_mid + ctrl * self.act_rng

        # ctrl[:7] = ctrl[:7]*0 + self.init_pose
        ctrl[-1] = 0
        self.data.ctrl[:] = ctrl

        self.sim.step()

        ob = self.get_obs(self.data)
        rew = self.get_rew(self.data)
        done = False
        return ob, rew, done, {}

    def render(self):
        self.viewer.render()

    def get_state(self):
        return self.sim.get_state()
Example #17
0
class Dribble_Env(object):
    def __init__(self):
        self.model = load_model_from_path("./xml/world3.xml")
        self.sim = MjSim(self.model)
        # self.viewer = MyMjViewer(self.sim)
        self.viewer = MyMjViewer(self.sim)
        self.max_vel = [-1000, 1000]
        self.x_motor = 0
        self.y_motor = 0
        self.date_time = datetime.datetime.now()
        self.path = "./datas/path_date" + str(
            self.date_time.strftime("_%Y%m%d_%H%M%S"))
        os.mkdir(self.path)

    def step(self, action):
        self.x_motor = ((action % 3) - 1) * 200
        self.y_motor = ((action // 3) - 1) * 200
        self.sim.data.ctrl[0] = self.x_motor
        self.sim.data.ctrl[1] = self.y_motor
        self.sim.step()

    def get_state(self):
        robot_x, robot_y = self.sim.data.body_xpos[1][0:2]
        robot_xv, robot_yv = self.sim.data.qvel[0:2]
        ball_x, ball_y = self.sim.data.body_xpos[2][0:2]
        ball_xv, ball_yv = self.sim.data.qvel[2:4]
        ball_pos_local = -(robot_x - ball_x), -(robot_y - ball_y)

        return [robot_x, robot_y, ball_pos_local[0], ball_pos_local[1], \
                robot_xv, robot_yv, ball_x, ball_y,ball_xv,ball_yv]

    def check_done(self):
        ball_x, ball_y = self.get_state()[6:8]
        if ball_x > 80 and -25 < ball_y < 25:
            return True
        else:
            return False

    def check_wall(self):
        ball_x, ball_y = self.get_state()[6:8]
        if math.fabs(ball_y) > 51:
            return True
        else:
            return False

    def reset(self):
        self.x_motor = 0
        self.y_motor = 0
        self.robot_x_data = []
        self.robot_y_data = []
        self.ball_x_data = []
        self.ball_y_data = []
        self.sim.reset()
        # self.sim.data.qpos[0] = np.random.randint(-3,3)
        self.sim.data.qpos[1] = np.random.randint(-5, 5)

    def render(self):
        self.viewer.render()

    def plot_data(self, step, t, done, episode, flag, reward):
        self.field_x = [-90, -90, 90, 90, -90]
        self.field_y = [-60, 60, 60, -60, -60]
        self.robot_x_data.append(self.sim.data.body_xpos[1][0])
        self.robot_y_data.append(self.sim.data.body_xpos[1][1])
        self.ball_x_data.append(self.sim.data.body_xpos[2][0])
        self.ball_y_data.append(self.sim.data.body_xpos[2][1])

        datas = str(self.robot_x_data[-1]) + " " + str(
            self.robot_y_data[-1]) + " " + str(
                self.ball_x_data[-1]) + " " + str(
                    self.ball_y_data[-1]) + " " + str(reward)
        with open(
                self.path + '/plotdata_' + str(episode + 1).zfill(4) + '.txt',
                'a') as f:
            f.write(str(datas) + '\n')

        if (t >= step - 1 or done) and flag:
            fig1 = plt.figure()
            plt.ion()
            plt.show()
            plt.plot(self.ball_x_data,
                     self.ball_y_data,
                     marker='o',
                     markersize=2,
                     color="red",
                     label="ball")
            plt.plot(self.robot_x_data,
                     self.robot_y_data,
                     marker="o",
                     markersize=2,
                     color='blue',
                     label="robot")
            plt.plot(self.field_x, self.field_y, markersize=1, color="black")
            plt.plot(80, 0, marker="X", color="green", label="goal")
            plt.legend(loc="lower right")
            # plt.axes().set_aspect('equal')
            plt.draw()
            plt.pause(0.001)
            plt.close(1)
class MujocoEnv(metaclass=EnvMeta):
    """
    Initializes a Mujoco Environment.

    Args:
        has_renderer (bool): If true, render the simulation state in
            a viewer instead of headless mode.

        has_offscreen_renderer (bool): True if using off-screen rendering.

        render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None'
            will result in the default angle being applied, which is useful as it can be dragged / panned by
            the user using the mouse

        render_collision_mesh (bool): True if rendering collision meshes
            in camera. False otherwise.

        render_visual_mesh (bool): True if rendering visual meshes
            in camera. False otherwise.

        render_gpu_device_id (int): corresponds to the GPU device id to use for offscreen rendering.
            Defaults to -1, in which case the device will be inferred from environment variables
            (GPUS or CUDA_VISIBLE_DEVICES).

        control_freq (float): how many control signals to receive
            in every simulated second. This sets the amount of simulation time
            that passes between every action input.

        horizon (int): Every episode lasts for exactly @horizon timesteps.

        ignore_done (bool): True if never terminating the environment (ignore @horizon).

        hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else,
            only calls sim.reset and resets all robosuite-internal variables

    Raises:
        ValueError: [Invalid renderer selection]
    """

    def __init__(
        self,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        render_gpu_device_id=-1,
        control_freq=20,
        horizon=1000,
        ignore_done=False,
        hard_reset=True
    ):
        # First, verify that both the on- and off-screen renderers are not being used simultaneously
        if has_renderer is True and has_offscreen_renderer is True:
            raise ValueError("the onscreen and offscreen renderers cannot be used simultaneously.")

        # Rendering-specific attributes
        self.has_renderer = has_renderer
        self.has_offscreen_renderer = has_offscreen_renderer
        self.render_camera = render_camera
        self.render_collision_mesh = render_collision_mesh
        self.render_visual_mesh = render_visual_mesh
        self.render_gpu_device_id = render_gpu_device_id
        self.viewer = None

        # Simulation-specific attributes
        self.control_freq = control_freq
        self.horizon = horizon
        self.ignore_done = ignore_done
        self.hard_reset = hard_reset
        self._model_postprocessor = None            # Function to post-process model after load_model() call
        self.model = None
        self.cur_time = None
        self.model_timestep = None
        self.control_timestep = None
        self.deterministic_reset = False            # Whether to add randomized resetting of objects / robot joints

        # Load the model
        self._load_model()

        # Post-process model
        self._postprocess_model()

        # Initialize the simulation
        self._initialize_sim()

        # Run all further internal (re-)initialization required
        self._reset_internal()

    def initialize_time(self, control_freq):
        """
        Initializes the time constants used for simulation.

        Args:
            control_freq (float): Hz rate to run control loop at within the simulation
        """
        self.cur_time = 0
        self.model_timestep = self.sim.model.opt.timestep
        if self.model_timestep <= 0:
            raise XMLError("xml model defined non-positive time step")
        self.control_freq = control_freq
        if control_freq <= 0:
            raise SimulationError(
                "control frequency {} is invalid".format(control_freq)
            )
        self.control_timestep = 1. / control_freq

    def set_model_postprocessor(self, postprocessor):
        """
        Sets the post-processor function that self.model will be passed to after load_model() is called during resets.

        Args:
            postprocessor (None or function): If set, postprocessing method should take in a Task-based instance and
                return no arguments.
        """
        self._model_postprocessor = postprocessor

    def _load_model(self):
        """Loads an xml model, puts it in self.model"""
        pass

    def _postprocess_model(self):
        """
        Post-processes model after load_model() call. Useful for external objects (e.g.: wrappers) to
        be able to modify the sim model before it is actually loaded into the simulation
        """
        if self._model_postprocessor is not None:
            self._model_postprocessor(self.model)

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        pass

    def _initialize_sim(self, xml_string=None):
        """
        Creates a MjSim object and stores it in self.sim. If @xml_string is specified, the MjSim object will be created
        from the specified xml_string. Else, it will pull from self.model to instantiate the simulation

        Args:
            xml_string (str): If specified, creates MjSim object from this filepath
        """
        # if we have an xml string, use that to create the sim. Otherwise, use the local model
        self.mjpy_model = load_model_from_xml(xml_string) if xml_string else self.model.get_model(mode="mujoco_py")

        # Create the simulation instance and run a single step to make sure changes have propagated through sim state
        self.sim = MjSim(self.mjpy_model)
        self.sim.forward()

        # Setup sim time based on control frequency
        self.initialize_time(self.control_freq)

    def reset(self):
        """
        Resets simulation.

        Returns:
            OrderedDict: Environment observation space after reset occurs
        """
        # TODO(yukez): investigate black screen of death
        # Use hard reset if requested
        if self.hard_reset and not self.deterministic_reset:
            self._destroy_viewer()
            self._load_model()
            self._postprocess_model()
            self._initialize_sim()
        # Else, we only reset the sim internally
        else:
            self.sim.reset()
        # Reset necessary robosuite-centric variables
        self._reset_internal()
        self.sim.forward()
        # Make sure that all sites are toggled OFF by default
        self.visualize(vis_settings={vis: False for vis in self._visualizations})
        # Return new observations
        return self._get_observation()

    def _reset_internal(self):
        """Resets simulation internal configurations."""

        # create visualization screen or renderer
        if self.has_renderer and self.viewer is None:
            self.viewer = MujocoPyRenderer(self.sim)
            self.viewer.viewer.vopt.geomgroup[0] = (1 if self.render_collision_mesh else 0)
            self.viewer.viewer.vopt.geomgroup[1] = (1 if self.render_visual_mesh else 0)

            # hiding the overlay speeds up rendering significantly
            self.viewer.viewer._hide_overlay = True

            # make sure mujoco-py doesn't block rendering frames
            # (see https://github.com/StanfordVL/robosuite/issues/39)
            self.viewer.viewer._render_every_frame = True

            # Set the camera angle for viewing
            if self.render_camera is not None:
                self.viewer.set_camera(camera_id=self.sim.model.camera_name2id(self.render_camera))

        elif self.has_offscreen_renderer:
            if self.sim._render_context_offscreen is None:
                render_context = MjRenderContextOffscreen(self.sim, device_id=self.render_gpu_device_id)
                self.sim.add_render_context(render_context)
            self.sim._render_context_offscreen.vopt.geomgroup[0] = (1 if self.render_collision_mesh else 0)
            self.sim._render_context_offscreen.vopt.geomgroup[1] = (1 if self.render_visual_mesh else 0)

        # additional housekeeping
        self.sim_state_initial = self.sim.get_state()
        self._get_reference()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

    def _get_observation(self):
        """
        Grabs observations from the environment.

        Returns:
            OrderedDict: OrderedDict containing observations [(name_string, np.array), ...]

        """
        return OrderedDict()

    def step(self, action):
        """
        Takes a step in simulation with control command @action.

        Args:
            action (np.array): Action to execute within the environment

        Returns:
            4-tuple:

                - (OrderedDict) observations from the environment
                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) misc information

        Raises:
            ValueError: [Steps past episode termination]

        """
        if self.done:
            raise ValueError("executing action in terminated episode")

        self.timestep += 1

        # Since the env.step frequency is slower than the mjsim timestep frequency, the internal controller will output
        # multiple torque commands in between new high level action commands. Therefore, we need to denote via
        # 'policy_step' whether the current step we're taking is simply an internal update of the controller,
        # or an actual policy update
        policy_step = True

        # Loop through the simulation at the model timestep rate until we're ready to take the next policy step
        # (as defined by the control frequency specified at the environment level)
        for i in range(int(self.control_timestep / self.model_timestep)):
            self.sim.forward()
            self._pre_action(action, policy_step)
            self.sim.step()
            policy_step = False

        # Note: this is done all at once to avoid floating point inaccuracies
        self.cur_time += self.control_timestep

        reward, done, info = self._post_action(action)
        return self._get_observation(), reward, done, info

    def _pre_action(self, action, policy_step=False):
        """
        Do any preprocessing before taking an action.

        Args:
            action (np.array): Action to execute within the environment
            policy_step (bool): Whether this current loop is an actual policy step or internal sim update step
        """
        self.sim.data.ctrl[:] = action

    def _post_action(self, action):
        """
        Do any housekeeping after taking an action.

        Args:
            action (np.array): Action to execute within the environment

        Returns:
            3-tuple:

                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) empty dict to be filled with information by subclassed method

        """
        reward = self.reward(action)

        # done if number of elapsed timesteps is greater than horizon
        self.done = (self.timestep >= self.horizon) and not self.ignore_done

        return reward, self.done, {}

    def reward(self, action):
        """
        Reward should be a function of state and action

        Args:
            action (np.array): Action to execute within the environment

        Returns:
            float: Reward from environment
        """
        raise NotImplementedError

    def render(self):
        """
        Renders to an on-screen window.
        """
        self.viewer.render()

    def observation_spec(self):
        """
        Returns an observation as observation specification.

        An alternative design is to return an OrderedDict where the keys
        are the observation names and the values are the shapes of observations.
        We leave this alternative implementation commented out, as we find the
        current design is easier to use in practice.

        Returns:
            OrderedDict: Observations from the environment
        """
        observation = self._get_observation()
        return observation

    def clear_objects(self, object_names):
        """
        Clears objects with the name @object_names out of the task space. This is useful
        for supporting task modes with single types of objects, as in
        @self.single_object_mode without changing the model definition.

        Args:
            object_names (str or list of str): Name of object(s) to remove from the task workspace
        """
        object_names = {object_names} if type(object_names) is str else set(object_names)
        for obj in self.model.mujoco_objects:
            if obj.name in object_names:
                self.sim.data.set_joint_qpos(obj.joints[0], np.array((10, 10, 10, 1, 0, 0, 0)))

    def visualize(self, vis_settings):
        """
        Do any needed visualization here

        Args:
            vis_settings (dict): Visualization keywords mapped to T/F, determining whether that specific
                component should be visualized. Should have "env" keyword as well as any other relevant
                options specified.
        """
        # Set visuals for environment objects
        for obj in self.model.mujoco_objects:
            obj.set_sites_visibility(sim=self.sim, visible=vis_settings["env"])

    @property
    def _visualizations(self):
        """
        Visualization keywords for this environment

        Returns:
            set: All components that can be individually visualized for this environment
        """
        return {"env"}

    @property
    def action_spec(self):
        """
        Action specification should be implemented in subclasses.

        Action space is represented by a tuple of (low, high), which are two numpy
        vectors that specify the min/max action limits per dimension.
        """
        raise NotImplementedError

    @property
    def action_dim(self):
        """
        Size of the action space

        Returns:
            int: Action space dimension
        """
        raise NotImplementedError

    def reset_from_xml_string(self, xml_string):
        """
        Reloads the environment from an XML description of the environment.

        Args:
            xml_string (str): Filepath to the xml file that will be loaded directly into the sim
        """

        # if there is an active viewer window, destroy it
        self.close()

        # Since we are reloading from an xml_string, we are deterministically resetting
        self.deterministic_reset = True

        # initialize sim from xml
        self._initialize_sim(xml_string=xml_string)

        # Now reset as normal
        self.reset()

        # Turn off deterministic reset
        self.deterministic_reset = False

    def check_contact(self, geoms_1, geoms_2=None):
        """
        Finds contact between two geom groups.

        Args:
            geoms_1 (str or list of str or MujocoModel): an individual geom name or list of geom names or a model. If
                a MujocoModel is specified, the geoms checked will be its contact_geoms
            geoms_2 (str or list of str or MujocoModel or None): another individual geom name or list of geom names.
                If a MujocoModel is specified, the geoms checked will be its contact_geoms. If None, will check
                any collision with @geoms_1 to any other geom in the environment

        Returns:
            bool: True if any geom in @geoms_1 is in contact with any geom in @geoms_2.
        """
        # Check if either geoms_1 or geoms_2 is a string, convert to list if so
        if type(geoms_1) is str:
            geoms_1 = [geoms_1]
        elif isinstance(geoms_1, MujocoModel):
            geoms_1 = geoms_1.contact_geoms
        if type(geoms_2) is str:
            geoms_2 = [geoms_2]
        elif isinstance(geoms_2, MujocoModel):
            geoms_2 = geoms_2.contact_geoms
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            # check contact geom in geoms
            c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1
            c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2 if geoms_2 is not None else True
            # check contact geom in geoms (flipped)
            c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1
            c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2 if geoms_2 is not None else True
            if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1):
                return True
        return False

    def get_contacts(self, model):
        """
        Checks for any contacts with @model (as defined by @model's contact_geoms) and returns the set of
        geom names currently in contact with that model (excluding the geoms that are part of the model itself).

        Args:
            model (MujocoModel): Model to check contacts for.

        Returns:
            set: Unique geoms that are actively in contact with this model.

        Raises:
            AssertionError: [Invalid input type]
        """
        # Make sure model is MujocoModel type
        assert isinstance(model, MujocoModel), \
            "Inputted model must be of type MujocoModel; got type {} instead!".format(type(model))
        contact_set = set()
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            # check contact geom in geoms; add to contact set if match is found
            g1, g2 = self.sim.model.geom_id2name(contact.geom1), self.sim.model.geom_id2name(contact.geom2)
            if g1 in model.contact_geoms and g2 not in model.contact_geoms:
                contact_set.add(g2)
            elif g2 in model.contact_geoms and g1 not in model.contact_geoms:
                contact_set.add(g1)
        return contact_set

    def _check_success(self):
        """
        Checks if the task has been completed. Should be implemented by subclasses

        Returns:
            bool: True if the task has been completed
        """
        raise NotImplementedError

    def _destroy_viewer(self):
        """
        Destroys the current mujoco renderer instance if it exists
        """
        # if there is an active viewer window, destroy it
        if self.viewer is not None:
            self.viewer.close()  # change this to viewer.finish()?
            self.viewer = None

    def close(self):
        """Do any cleanup necessary here."""
        self._destroy_viewer()
Example #19
0
class GatherEnv:
    @autoassign(exclude=('model_path'))
    def __init__(self,
                 model_path,
                 n_apples=8,
                 n_bombs=8,
                 apple_reward=10,
                 bomb_cost=1,
                 activity_range=6.0,
                 catch_range=10,
                 n_bins=10,
                 robot_object_spacing=2.0,
                 sensor_range=6.0,
                 sensor_span=pi):
        self.viewer = None

        self.apples = []
        self.bombs = []

        self.model = self.build_model(model_path)
        self.sim = MjSim(self.model)

        self._max_episode_steps = None
        self._step_num = 0

    def build_model(self, agent_xml_path):
        sim_size = self.activity_range + 1

        model_tree = ET.parse(agent_xml_path)
        worldbody = model_tree.find('.//worldbody')

        floor_attrs = dict(name='floor',
                           type='plane',
                           material='MatPlane',
                           pos='0 0 0',
                           size=f'{sim_size} {sim_size} {sim_size}',
                           conaffinity='1',
                           rgba='0.8 0.9 0.8 1',
                           condim='3')
        wall_attrs = dict(type='box',
                          conaffinity='1',
                          rgba='0.8 0.9 0.8 1',
                          condim='3')

        wall_poses = [f'0 {-sim_size} 0', f'0 {sim_size} 0', \
                      f'{-sim_size} 0 0', f'{sim_size} 0 0']
        wall_sizes = [f'{sim_size + 0.1} 0.1 1', f'{sim_size + 0.1} 0.1 1', \
                      f'0.1 {sim_size + 0.1} 1', f'0.1 {sim_size + 0.1} 1']

        # Add a floor to the model
        ET.SubElement(worldbody, 'geom', **floor_attrs)

        # Add walls to the model
        for i, pos, size in zip(range(4), wall_poses, wall_sizes):
            ET.SubElement(
                worldbody, 'geom',
                dict(**wall_attrs, name=f'wall{i}', pos=pos, size=size))

        model_xml = ET.tostring(model_tree.getroot(),
                                encoding='unicode',
                                method='xml')
        model = load_model_from_xml(model_xml)

        return model

    def reset(self):
        self.sim.reset()

        self.apples = []
        self.bombs = []
        obj_coords = []

        self._step_num = 0

        agent_x_pos, agent_y_pos = self.sim.data.qpos[0], self.sim.data.qpos[1]

        rand_coord = lambda: np.random.randint(-self.activity_range, self.
                                               activity_range)

        while (len(self.apples) < self.n_apples):
            x, y = rand_coord(), rand_coord()

            # Change this later to make (0, 0) the position of the agent
            if in_range(x, y, agent_x_pos, agent_y_pos,
                        self.robot_object_spacing):
                continue

            if (x, y) in obj_coords:
                continue

            self.apples.append(Object(APPLE, x, y))
            obj_coords.append((x, y))

        while (len(self.bombs) < self.n_bombs):
            x, y = rand_coord(), rand_coord()

            if in_range(x, y, 0, 0, self.robot_object_spacing):
                continue

            if (x, y) in obj_coords:
                continue

            self.bombs.append(Object(BOMB, x, y))
            obj_coords.append((x, y))

        return self._get_obs()

    def step(self, action):
        raise NotImplementedError

    def _do_simulation(self, action):
        raise NotImplementedError

    def _update_objects(self):
        n_apples = 0
        n_bombs = 0
        agent_x_pos, agent_y_pos = self.sim.data.qpos[0], self.sim.data.qpos[1]

        for apple in self.apples:
            if in_range(apple.x, apple.y, agent_x_pos, agent_y_pos,
                        self.catch_range):
                n_apples += 1
                self.apples.remove(apple)

        for bomb in self.bombs:
            if in_range(bomb.x, bomb.y, agent_x_pos, agent_y_pos,
                        self.catch_range):
                n_bombs += 1
                self.bombs.remove(bomb)

        return n_apples, n_bombs

    def _get_self_obs(self):
        raise NotImplementedError

    def _get_sensor_obs(self):
        apple_readings = np.zeros(self.n_bins)
        bomb_readings = np.zeros(self.n_bins)

        idx = self.model.body_names.index('torso')
        com = self.sim.data.subtree_com[idx].flat
        agent_x, agent_y = com[:2]

        sort_key = lambda obj: -euclidian_dist(obj.x, obj.y, agent_x, agent_y)
        sorted_objs = sorted(self.apples + self.bombs, key=sort_key)

        bin_res = self.sensor_span / self.n_bins
        half_span = self.sensor_span / 2
        orientation = std_radians(self._get_orientation())

        for obj_type, obj_x, obj_y in sorted_objs:
            dist = euclidian_dist(obj_x, obj_y, agent_x, agent_y)

            if dist > self.sensor_range:
                continue

            # Get the components of the vector from the agent to the object
            x_delta = obj_x - agent_x
            y_delta = obj_y - agent_y

            # Get the angle of the vector relative to the agent's sensor range
            angle = np.arctan2(y_delta, x_delta)
            angle = std_radians(angle - orientation + half_span)

            if angle > self.sensor_span:
                continue

            bin_number = int(angle / bin_res)

            if bin_number == int(angle / bin_res):
                bin_number -= 1

            intensity = 1.0 - dist / self.sensor_range

            if obj_type == APPLE:
                apple_readings[bin_number] = intensity
            else:
                bomb_readings[bin_number] = intensity

        sensor_obs = np.concatenate([apple_readings, bomb_readings])

        return sensor_obs

        #     dist = euclidian_dist(obj_x, obj_y, agent_x, agent_y)
        #
        #     if dist > self.sensor_range:
        #         continue
        #
        #     # Get the components of the vector from the agent to the object
        #     x_comp = obj_x - agent_x
        #     y_comp = obj_y - agent_y
        #
        #     # Get the angle of the vector relative to the agent's sensor range
        #     angle = np.arctan2(y_comp, x_comp)
        #     angle = standardize_radians(angle - orientation)
        #
        #     # Skip if object is outside sensor span
        #     if angle > half_span and angle < 2 * pi - half_span:
        #         continue
        #
        #     # Standardize angle to range [0, pi] to more easily find bin number
        #     angle = standardize_radians(angle + half_span)
        #     bin_number = int(angle / bin_res)
        #
        #     # The object is exactly on the upper bound of the sensor span
        #     if bin_number == self.n_bins:
        #         bin_number -= 1
        #
        #     intensity = 1.0 - dist / self.sensor_range
        #
        #     if obj_type == APPLE:
        #         apple_readings[bin_number] = intensity
        #     else:
        #         bomb_readings[bin_number] = intensity
        #
        # sensor_obs = np.concatenate([apple_readings, bomb_readings])
        #
        # return sensor_obs

    def _get_obs(self):
        return np.concatenate([self._get_self_obs(), self._get_sensor_obs()])

    def _get_orientation(self):
        raise NotImplementedError

    def _is_done(self):
        raise NotImplementedError

    def _unhealthy_cost(self):
        raise NotImplementedError

    def render(self):
        if not self.viewer:
            self.viewer = MjViewer(self.sim)

        objects = self.apples + self.bombs

        for object in objects:
            x, y = object.x, object.y
            rgba = APPLE_RGBA if object.type is APPLE else BOMB_RGBA
            self.viewer.add_marker(type=GEOM_SPHERE,
                                   pos=np.asarray([x, y, 0.5]),
                                   rgba=rgba,
                                   size=np.asarray([0.5] * 3))

        self.viewer.render()
Example #20
0
class HopperEnv:
    def __init__(
        self,
        robot_folders,
        robot_dir,
        substeps,
        train=True,
        with_embed=None,
        with_kin=None,
        with_dyn=None,
    ):
        self.with_embed = with_embed
        self.with_kin = with_kin
        self.with_dyn = with_dyn
        if self.with_kin:
            norm_file = 'stats/kin_stats.json'
            with open(norm_file, 'r') as f:
                stats = json.load(f)
            self.kin_mu = np.array(stats['mu']).reshape(-1)
            self.kin_simga = np.array(stats['sigma']).reshape(-1)

        if self.with_dyn:
            norm_file = 'stats/dyn_stats.json'
            with open(norm_file, 'r') as f:
                stats = json.load(f)
            self.dyn_mu = np.array(stats['mu']).reshape(-1)
            self.dyn_sigma = np.array(stats['sigma']).reshape(-1)
            self.dyn_min = np.array(stats['min']).reshape(-1)
            self.dyn_max = np.array(stats['max']).reshape(-1)

        self.metadata = {}
        self.viewer = None
        self.reward_range = (-np.inf, np.inf)
        self.nsubsteps = substeps
        self.spec = None
        self.seed()
        self.bodies = ['torso', 'thigh', 'leg', 'foot']
        self.links = ['torso_geom', 'thigh_geom', 'leg_geom', 'foot_geom']

        self.robots = []
        for folder in robot_folders:
            self.robots.append(os.path.join(robot_dir, folder))

        self.dir2id = {folder: idx for idx, folder in enumerate(self.robots)}
        self.robot_num = len(self.robots)
        self.robot_id = 0
        self.reset_robot(self.robot_id)

        if train:
            # in training, we used the last 100 robots
            # as the testing robots (validation)
            # and the first 100 robots as the train_test robots
            # these robots are used to generate the
            # learning curves in training time
            self.test_robot_num = min(100, self.robot_num)
            train_robot_num = self.robot_num - self.test_robot_num
            self.test_robot_ids = list(range(train_robot_num, self.robot_num))
            self.train_test_robot_num = min(self.test_robot_num,
                                            train_robot_num)
            self.train_test_robot_ids = list(range(self.train_test_robot_num))
            self.train_test_conditions = self.train_test_robot_num
        else:
            self.test_robot_num = self.robot_num
            self.test_robot_ids = list(range(self.robot_num))

        self.test_conditions = self.test_robot_num
        self.train_robot_num = self.robot_num - self.test_robot_num \
            if not self.with_embed else self.robot_num
        print('Train robots: ', self.train_robot_num)
        print('Test robots: ', self.test_robot_num)
        bounds = self.sim.model.actuator_ctrlrange
        self.ctrl_low = bounds[:, 0]
        self.ctrl_high = bounds[:, 1]
        self.scaling = (self.ctrl_high - self.ctrl_low) * 0.5
        self.action_space = spaces.Box(self.ctrl_low,
                                       self.ctrl_high,
                                       dtype=np.float32)
        observation = self.get_obs()
        self.obs_dim = observation.size
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)
        self.ep_reward = 0
        self.ep_len = 0
        self.reset(robot_id=0)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset_model(self):
        qpos = self.init_qpos + self.np_random.uniform(
            low=-.005, high=.005, size=self.model.nq)
        qvel = self.init_qvel + self.np_random.uniform(
            low=-.005, high=.005, size=self.model.nv)
        self.set_state(qpos, qvel)
        return self.get_obs()

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and\
               qvel.shape == (self.model.nv,)
        old_state = self.sim.get_state()
        new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
                                         old_state.act, old_state.udd_state)
        self.sim.set_state(new_state)
        self.sim.forward()

    def reset_robot(self, robot_id):
        self.robot_folder_id = self.dir2id[self.robots[robot_id]]
        robot_file = os.path.join(self.robots[robot_id], 'robot.xml')
        self.model = load_model_from_path(robot_file)
        self.sim = MjSim(self.model, nsubsteps=self.nsubsteps)
        self.sim.reset()
        if self.viewer is not None:
            self.viewer = MjViewer(self.sim)
        self.init_qpos = self.sim.data.qpos.ravel().copy()
        self.init_qvel = self.sim.data.qvel.ravel().copy()

    def reset(self, robot_id=None):
        if robot_id is None:
            self.robot_id = self.np_random.randint(0, self.train_robot_num,
                                                   1)[0]
        else:
            self.robot_id = robot_id
        self.reset_robot(self.robot_id)
        ob = self.reset_model()
        self.ep_reward = 0
        self.ep_len = 0
        return ob

    def scale_action(self, action):
        act_k = (self.action_space.high - self.action_space.low) / 2.
        act_b = (self.action_space.high + self.action_space.low) / 2.
        return act_k * action + act_b

    def test_reset(self, cond):
        if cond >= len(self.test_robot_ids):
            return np.full((self.obs_dim), np.nan)
        robot_id = self.test_robot_ids[cond]
        return self.reset(robot_id=robot_id)

    def train_test_reset(self, cond):
        if cond >= len(self.train_test_robot_ids):
            return np.full((self.obs_dim), np.nan)
        robot_id = self.train_test_robot_ids[cond]
        return self.reset(robot_id=robot_id)

    def step(self, a):
        a = self.scale_action(a)
        posbefore = self.sim.data.qpos[0]
        self.do_simulation(a)
        posafter, height, ang = self.sim.data.qpos[0:3]
        alive_bonus = 1.0
        reward = (posafter - posbefore) / self.dt
        reward += alive_bonus
        reward -= 1e-3 * np.square(a).sum()
        s = self.state_vector()
        done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
                    (height > .7) and (abs(ang) < .2))
        ob = self.get_obs()
        self.ep_reward += reward
        self.ep_len += 1
        info = {
            'reward_so_far': self.ep_reward,
            'steps_so_far': self.ep_len,
            'reward': reward,
            'step': 1
        }
        return ob, reward, done, info

    @property
    def dt(self):
        return self.model.opt.timestep * self.nsubsteps

    def state_vector(self):
        return np.concatenate(
            [self.sim.data.qpos.flat, self.sim.data.qvel.flat])

    def do_simulation(self, ctrl):
        self.sim.data.ctrl[:] = ctrl
        self.sim.step()

    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        self.viewer.render()

    def get_obs(self):
        ob = np.concatenate([
            self.sim.data.qpos.flat[1:],
            np.clip(self.sim.data.qvel.flat, -10, 10)
        ])
        if self.with_kin:
            link_length = self.get_link_length(self.sim)
            link_length = np.divide((link_length - self.kin_mu),
                                    self.kin_simga)
            ob = np.concatenate([ob, link_length])
        if self.with_dyn:
            body_mass = self.get_body_mass(self.sim)
            joint_friction = self.get_friction(self.sim)
            joint_damping = self.get_damping(self.sim)
            armature = self.get_armature(self.sim)
            dyn_vec = np.concatenate(
                (body_mass, joint_friction, joint_damping, armature))
            # dyn_vec = np.divide((dyn_vec - self.dyn_mu), self.dyn_simga)
            dyn_vec = np.divide((dyn_vec - self.dyn_min),
                                self.dyn_max - self.dyn_min)
            ob = np.concatenate([ob, dyn_vec])
        if self.with_embed:
            ob = np.concatenate((ob, np.array([self.robot_folder_id])))
        return ob

    def get_link_length(self, sim):
        link_length = []
        for link in self.links:
            geom_id = sim.model._geom_name2id[link]
            link_length.append(sim.model.geom_size[geom_id][1])
        return np.asarray(link_length).reshape(-1)

    def get_damping(self, sim):
        return sim.model.dof_damping[3:].reshape(-1)

    def get_friction(self, sim):
        return sim.model.dof_frictionloss[3:].reshape(-1)

    def get_body_mass(self, sim):
        body_mass = []
        for body in self.bodies:
            body_id = sim.model._body_name2id[body]
            body_mass.append(sim.model.body_mass[body_id])
        return np.asarray(body_mass).reshape(-1)

    def get_armature(self, sim):
        return sim.model.dof_armature[3:].reshape(-1)

    def close(self):
        pass
Example #21
0
class MujocoEnv(gym.Env):
    def __init__(self,
                 model_path,
                 frame_skip=1,
                 action_noise=0.0,
                 random_init_state=True):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)

        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)

        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        self.init_qacc = self.data.qacc.ravel().copy()
        self.init_ctrl = self.data.ctrl.ravel().copy()

        self.qpos_dim = self.init_qpos.size
        self.qvel_dim = self.init_qvel.size
        self.ctrl_dim = self.init_ctrl.size

        self.action_noise = action_noise
        self.random_init_state = random_init_state
        """
        if "init_qpos" in self.model.numeric_names:
            init_qpos_id = self.model.numeric_names.index("init_qpos")
            addr = self.model.numeric_adr.flat[init_qpos_id]
            size = self.model.numeric_size.flat[init_qpos_id]
            init_qpos = self.model.numeric_data.flat[addr:addr + size]
            self.init_qpos = init_qpos

        """
        self.dcom = None
        self.current_com = None
        self.reset()
        super(MujocoEnv, self).__init__()

    @property
    def action_space(self):
        bounds = self.model.actuator_ctrlrange.copy()
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        return spaces.Box(lb, ub)

    @property
    def observation_space(self):
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        return spaces.Box(ub * -1, ub)

    @property
    def action_bounds(self):
        return self.action_space.low, self.action_space.high

    def reset_mujoco(self, init_state=None):
        if init_state is None:
            if self.random_init_state:
                qp = self.init_qpos.copy() + \
                     np.random.normal(size=self.init_qpos.shape) * 0.01
                qv = self.init_qvel.copy() + \
                     np.random.normal(size=self.init_qvel.shape) * 0.1
            else:
                qp = self.init_qpos.copy()
                qv = self.init_qvel.copy()

            qacc = self.init_qacc.copy()
            ctrl = self.init_ctrl.copy()

        else:
            pass
            """
            start = 0
            for datum_name in ["qpos", "qvel", "qacc", "ctrl"]:
                datum = getattr(self.data, datum_name)
                datum_dim = datum.shape[0]
                datum = init_state[start: start + datum_dim]
                setattr(self.model.data, datum_name, datum)
                start += datum_dim
            """
        self.set_state(qp, qv)

    def reset(self, init_state=None):

        # self.reset_mujoco(init_state)
        self.sim.reset()
        self.sim.forward()

        self.current_com = self.data.subtree_com[0]
        self.dcom = np.zeros_like(self.current_com)

        return self.get_current_obs()

    def set_state(self, qpos, qvel, qacc):
        assert qpos.shape == (self.qpos_dim, ) and qvel.shape == (
            self.qvel_dim, ) and qacc.shape == (self.qacc_dim, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.model.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate([self.data.qpos.flat, self.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.data.qpos,
            self.data.qvel,
            self.data.qacc,
            self.data.ctrl,
        ]).ravel()

    def inject_action_noise(self, action):
        # generate action noise
        noise = self.action_noise * \
                np.random.normal(size=action.shape)
        # rescale the noise to make it proportional to the action bounds
        lb, ub = self.action_bounds
        noise = 0.5 * (ub - lb) * noise
        return action + noise

    def forward_dynamics(self, action):
        ctrl = self.inject_action_noise(action)
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]

        for _ in range(self.frame_skip):
            self.sim.step()

        new_com = self.data.subtree_com[0]
        self.dcom = new_com - self.current_com
        self.current_com = new_com

    def get_viewer(self, config=None):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            # self.viewer.start()
            # self.viewer.set_model(self.model)
        if config is not None:
            pass
            # self.viewer.set_window_pose(config["xpos"], config["ypos"])
            # self.viewer.set_window_size(config["width"], config["height"])
            # self.viewer.set_window_title(config["title"])
        return self.viewer

    def render(self, close=False, mode='human', config=None):
        if mode == 'human':
            # viewer = self.get_viewer(config=config)
            try:
                self.viewer.render()

            except:
                self.get_viewer(config=config)
                self.viewer.render()
        elif mode == 'rgb_array':
            viewer = self.get_viewer(config=config)
            viewer.loop_once()
            # self.get_viewer(config=config).render()
            data, width, height = self.get_viewer(config=config).get_image()
            return np.fromstring(data,
                                 dtype='uint8').reshape(height, width,
                                                        3)[::-1, :, :]
        if close:
            self.stop_viewer()

    # def start_viewer(self):
    #     viewer = self.get_viewer()
    #     if not viewer.running:
    #         viewer.start()
    #
    # def stop_viewer(self):
    #     if self.viewer:
    #         self.viewer.finish()
    #         self.viewer = None

    # def release(self):
    #     # temporarily alleviate the issue (but still some leak)
    #     from learning_to_adapt.mujoco_py.mjlib import mjlib
    #     mjlib.mj_deleteModel(self.model._wrapped)
    #     mjlib.mj_deleteData(self.data._wrapped)

    def get_body_xmat(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.data.ximat[idx].reshape((3, 3))

    def get_body_com(self, body_name):
        return self.data.get_body_xpos(body_name)

    def get_body_comvel(self, body_name):
        idx = self.model.body_names.index(body_name)

        ## _compute_subtree
        body_vels = np.zeros((self.model.nbody, 6))
        # bodywise quantities
        mass = self.model.body_mass.flatten()
        for i in range(self.model.nbody):
            # body velocity
            # Compute object 6D velocity in object-centered frame, world/local orientation.
            # mj_objectVelocity(const mjModel* m, const mjData* d, int objtype, int objid, mjtMum* res, int flg_local)
            mujoco_py.cymj._mj_objectVelocity(self.model, self.data, 1, i,
                                              body_vels[i], 0)
        lin_moms = body_vels[:, 3:] * mass.reshape((-1, 1))

        # init subtree mass
        body_parentid = self.model.body_parentid
        # subtree com and com_vel
        for i in range(self.model.nbody - 1, -1, -1):
            if i > 0:
                parent = body_parentid[i]
                # add scaled velocities
                lin_moms[parent] += lin_moms[i]
                # accumulate mass
                mass[parent] += mass[i]
        return_ = lin_moms / mass.reshape((-1, 1))
        return return_[idx]

        # return self.model.body_comvels[idx]

    # def get_body_comvel(self, body_name):
    #     idx = self.model.body_names.index(body_name)
    #
    #     return self.model.body_comvels[idx]

    # def print_stats(self):
    #     super(MujocoEnv, self).print_stats()
    #     print("qpos dim:\t%d" % len(self.data.qpos))

    def action_from_key(self, key):
        raise NotImplementedError

    # def set_state_tmp(self, state, restore=True):
    #     if restore:
    #         prev_pos = self.data.qpos
    #         prev_qvel = self.data.qvel
    #         prev_ctrl = self.data.ctrl
    #         prev_act = self.data.act
    #     qpos, qvel = self.decode_state(state)
    #     self.model.data.qpos = qpos
    #     self.model.data.qvel = qvel
    #     self.model.forward()
    #     yield
    #     if restore:
    #         self.data.qpos = prev_pos
    #         self.data.qvel = prev_qvel
    #         self.data.ctrl = prev_ctrl
    #         self.data.act = prev_act
    #         self.model.forward()

    def get_param_values(self):
        return {}

    def set_param_values(self, values):
        pass
Example #22
0
def test_jacobians():
    xml = """
    <mujoco>
        <worldbody>
            <body name="body1" pos="0 0 0">
                <joint axis="1 0 0" name="a" pos="0 0 0" type="hinge"/>
                <geom name="geom1" pos="0 0 0" size="1.0"/>
                <body name="body2" pos="0 0 1">
                    <joint name="b" axis="1 0 0" pos="0 0 1" type="hinge"/>
                    <geom name="geom2" pos="1 1 1" size="0.5"/>
                    <site name="target" size="0.1"/>
                </body>
            </body>
        </worldbody>
        <actuator>
            <motor joint="a"/>
            <motor joint="b"/>
        </actuator>
    </mujoco>
    """
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    sim.reset()
    # After reset jacobians are all zeros
    target_jacp = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    target_test = np.array([0, 0, -1, 1, 0, 0])
    np.testing.assert_allclose(target_jacp, target_test)
    # Should be unchanged after steps (zero action)
    for _ in range(2):
        sim.step()
        sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert np.linalg.norm(target_jacp - target_test) < 1e-3
    # Apply a very large action, ensure jacobian unchanged after step
    sim.reset()
    sim.forward()
    sim.data.ctrl[:] = np.ones(sim.model.nu) * 1e9
    sim.step()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, target_test)
    # After large action, ensure jacobian changed after forward
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert not np.allclose(target_jacp, target_test)
    # Test the `site_jacp` property, which gets all at once
    np.testing.assert_allclose(target_jacp, sim.data.site_jacp[0])
    # Test not passing in array
    sim.reset()
    sim.forward()
    target_jacp = sim.data.get_site_jacp('target')
    np.testing.assert_allclose(target_jacp, target_test)
    # Test passing in bad array (long instead of double)
    target_jacp = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacp('target', jacp=target_jacp)
    # Test rotation jacobian - like above but 'jacr' instead of 'jacp'
    # After reset jacobians are all zeros
    sim.reset()
    target_jacr = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacr('target', jacr=target_jacr)
    np.testing.assert_allclose(target_jacr, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacr('target', jacr=target_jacr)
    target_test = np.array([1, 1, 0, 0, 0, 0])
    # Test allocating dedicated array
    target_jacr = sim.data.get_site_jacr('target')
    np.testing.assert_allclose(target_jacr, target_test)
    # Test the batch getter (all sites at once)
    np.testing.assert_allclose(target_jacr, sim.data.site_jacr[0])
    # Test passing in bad array
    target_jacr = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacr('target', jacr=target_jacr)
Example #23
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip):
        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = mujoco_py.load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        # assert not done
        self.obs_dim = observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def viewer_setup(self):
        """
        This method is called when the viewer is initialized and after every reset
        Optionally implement this method, if you need to tinker with camera position
        and so forth.
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        ob = self.reset_model()
        if self.viewer is not None:
            # self.viewer.autoscale()
            self.viewer_setup()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        self.data.qpos[:] = qpos
        self.data.qvel[:] = qvel
        # self.sim._compute_subtree()  # pylint: disable=W0212
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        t0 = time.time()
        self.data.ctrl[:] = ctrl
        for _ in range(n_frames):
            self.sim.step()
        if self.viewer is not None:
            self.viewer.sim_time = time.time() - t0

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                # self._get_viewer().finish()
                self.viewer = None
            return

        if mode == 'rgb_array':
            self._get_viewer().render()
            data, width, height = self._get_viewer().get_image()
            return np.fromstring(data,
                                 dtype='uint8').reshape(height, width,
                                                        3)[::-1, :, :]
        elif mode == 'human':
            self._get_viewer().render()

    def _get_viewer(self):
        if self.viewer is None:
            self.viewer = mujoco_py.MjViewer(self.sim)
            # self.viewer.start()
            # self.viewer.set_model(self.model)
        self.viewer_setup()
        return self.viewer

    def get_body_com(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.data.body_xpos[idx]

    def get_body_comvel(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.data.body_xvelp[idx]

    def get_body_xmat(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.data.body_xmat[idx].reshape((3, 3))

    def state_vector(self):
        return np.concatenate([self.data.qpos.flat, self.data.qvel.flat])
Example #24
0
class RobotEnv(object):
    def __init__(self):
        self.model = load_model_from_path(XML_PATH)
        self.sim = MjSim(self.model)
        self.viewer = MjViewer(self.sim)

        # finger instances
        self.front_finger = RollerFinger(idx=0,
                                         name='front',
                                         pos_base=np.array([0, 0.048, 0.085]),
                                         base_normal=np.array([0., -1., 0.]),
                                         base_horizontal=np.array([1., 0.,
                                                                   0.]))
        self.left_finger = RollerFinger(
            idx=1,
            name='left',
            pos_base=np.array([-0.04157, -0.024, 0.085]),
            base_normal=np.array([0.8660, 0.5, 0.]),
            base_horizontal=np.array([-0.5, 0.8660, 0.]))
        self.right_finger = RollerFinger(
            idx=2,
            name='right',
            pos_base=np.array([0.04157, -0.024, 0.085]),
            base_normal=np.array([-0.8660, 0.5, 0.]),
            base_horizontal=np.array([-0.5, -0.8660, 0.]))

        self.r_fingers = [
            self.front_finger, self.left_finger, self.right_finger
        ]

        # object target orientation in angle-axis representation
        self.axis = normalize_vec(np.array([0., 1., 1.]))
        self.angle = deg_to_rad(90)

        self.init_box_pos = np.array([0.0, 0.0, 0.2])  # obj initial pos
        self.target_box_pos = np.array([0.0, 0.0, 0.2])  # obj target pos

        self.max_step = 1500
        self.termination = False
        self.success = False
        self.timestep = 0

        self.k_vw = 0.5
        self.k_vv = 0.3

        self.reset()

    def reset(self, target_axis=np.array([0., 1., 1.]), target_angle=90):

        self.sim.reset()

        # Reset finger positions
        for fg in self.r_fingers:
            self.sim.data.qpos[fg.idx * 3] = fg.init_base_angle  # base angles
            self.sim.data.qpos[fg.idx * 3 + 1] = 0  # pivot angles

        my_map = [0, 3, 6, 1, 4, 7, 2, 5,
                  8]  # qpos and ctrl have different joint orders (see xml)
        for ii in range(9):
            self.sim.data.ctrl[ii] = self.sim.data.qpos[my_map[ii]]
        self.sim.step()

        self.termination = False
        self.success = False
        self.timestep = 0

        # reset target
        if target_axis is not None:
            self.axis = normalize_vec(target_axis)
        if target_angle is not None:
            self.angle = deg_to_rad(target_angle)
        self.quat_target = np.array([
            np.cos(self.angle / 2), self.axis[0] * np.sin(self.angle / 2),
            self.axis[1] * np.sin(self.angle / 2),
            self.axis[2] * np.sin(self.angle / 2)
        ])
        self.curr = self.sim.data.sensordata[
            -7:]  # quat_to_rot(self.sim.data.sensordata[-4:])
        self.rot_matrix_target = R.from_quat(quat_to_quat(
            self.quat_target)).as_matrix()
        self.test_min_err = 100

        self.session_name = str(self.axis) + ', ' + str(rad_to_deg(self.angle))
        return self.sim.data.sensordata

    def get_expert_action(self):
        # Sensor data
        curr_data = self.sim.data.sensordata
        self.cube_pos = curr_data[-7:-4]  # obj position
        self.cube_orientation = curr_data[-4:]  # obj orientation

        # compute each finger
        for fg in self.r_fingers:
            fg.base_rad = self.sim.data.qpos[fg.idx * 3]
            fg.q_pivot_prev = self.sim.data.qpos[fg.idx * 3 + 1]
            fg.q_pivot, fg.dq_roller = compute_pivot_and_roller(
                k_vw=self.k_vw,
                k_vv=self.k_vv,
                base_angle=fg.base_rad,
                pos_obj_curr=self.cube_pos,
                pos_obj_target=self.target_box_pos,
                ori_obj_curr=self.cube_orientation,
                ori_obj_target=self.quat_target,
                r_roller=fg.roller_radius,
                finger_length=fg.dist_pivot_to_base,
                pos_base=fg.pos_base,
                base_axis=fg.base_horizontal,
                finger_normal=fg.base_normal)

            dq_pivot = np.clip(fg.q_pivot - fg.q_pivot_prev, -fg.q_pivot_limit,
                               fg.q_pivot_limit)
            fg.q_pivot = fg.q_pivot_prev + dq_pivot

            self.sim.data.ctrl[fg.idx] = -fg.init_base_angle
            self.sim.data.ctrl[3 + fg.idx] = fg.pivot_gear_ratio * fg.q_pivot
            self.sim.data.ctrl[6 + fg.idx] += fg.dq_roller

        action = np.copy(self.sim.data.ctrl)
        return action

    def step(self, action):
        for i in range(len(action)):
            self.sim.data.ctrl[i] = action[i]

        self.viewer.add_overlay(const.GRID_TOPRIGHT, " ", self.session_name)
        self.viewer.render()
        self.sim.step()
        self.timestep += 1

        obs = self.sim.data.sensordata
        curr = obs[-7:]  # object pose and orientation

        err_curr_rot = get_quat_error(curr[3:], self.quat_target)
        err_curr_pos = get_pos_error(curr[:3], self.target_box_pos)
        err_curr = SCALE_ERROR_ROT * err_curr_rot + SCALE_ERROR_POS * err_curr_pos
        self.test_min_err = min(self.test_min_err, err_curr_rot)
        if err_curr < 15:
            self.termination = True
            self.success = True
        else:
            if self.timestep > self.max_step or err_curr_pos > 0.05:
                self.termination = True
                self.success = False
        #if self.termination:
        #    print("target axis",self.axis,"success",self.success,"min_error",self.test_min_err)
        return obs, self.termination, self.success
Example #25
0
def test_jacobians():
    xml = """
    <mujoco>
        <worldbody>
            <body name="body1" pos="0 0 0">
                <joint axis="1 0 0" name="a" pos="0 0 0" type="hinge"/>
                <geom name="geom1" pos="0 0 0" size="1.0"/>
                <body name="body2" pos="0 0 1">
                    <joint name="b" axis="1 0 0" pos="0 0 1" type="hinge"/>
                    <geom name="geom2" pos="1 1 1" size="0.5"/>
                    <site name="target" size="0.1"/>
                </body>
            </body>
        </worldbody>
        <actuator>
            <motor joint="a"/>
            <motor joint="b"/>
        </actuator>
    </mujoco>
    """
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    sim.reset()
    # After reset jacobians are all zeros
    target_jacp = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    target_test = np.array([0, 0, -1, 1, 0, 0])
    np.testing.assert_allclose(target_jacp, target_test)
    # Should be unchanged after steps (zero action)
    for _ in range(2):
        sim.step()
        sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert np.linalg.norm(target_jacp - target_test) < 1e-3
    # Apply a very large action, ensure jacobian unchanged after step
    sim.reset()
    sim.forward()
    sim.data.ctrl[:] = np.ones(sim.model.nu) * 1e9
    sim.step()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, target_test)
    # After large action, ensure jacobian changed after forward
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert not np.allclose(target_jacp, target_test)
    # Test the `site_jacp` property, which gets all at once
    np.testing.assert_allclose(target_jacp, sim.data.site_jacp[0])
    # Test not passing in array
    sim.reset()
    sim.forward()
    target_jacp = sim.data.get_site_jacp('target')
    np.testing.assert_allclose(target_jacp, target_test)
    # Test passing in bad array (long instead of double)
    target_jacp = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacp('target', jacp=target_jacp)
    # Test rotation jacobian - like above but 'jacr' instead of 'jacp'
    # After reset jacobians are all zeros
    sim.reset()
    target_jacr = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacr('target', jacr=target_jacr)
    np.testing.assert_allclose(target_jacr, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacr('target', jacr=target_jacr)
    target_test = np.array([1, 1, 0, 0, 0, 0])
    # Test allocating dedicated array
    target_jacr = sim.data.get_site_jacr('target')
    np.testing.assert_allclose(target_jacr, target_test)
    # Test the batch getter (all sites at once)
    np.testing.assert_allclose(target_jacr, sim.data.site_jacr[0])
    # Test passing in bad array
    target_jacr = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacr('target', jacr=target_jacr)
Example #26
0
class MujocoEnv(metaclass=EnvMeta):
    """
    Initializes a Mujoco Environment.
    Args:
        has_renderer (bool): If true, render the simulation state in
            a viewer instead of headless mode.
        has_offscreen_renderer (bool): True if using off-screen rendering.
        render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None'
            will result in the default angle being applied, which is useful as it can be dragged / panned by
            the user using the mouse
        render_collision_mesh (bool): True if rendering collision meshes
            in camera. False otherwise.
        render_visual_mesh (bool): True if rendering visual meshes
            in camera. False otherwise.
        render_gpu_device_id (int): corresponds to the GPU device id to use for offscreen rendering.
            Defaults to -1, in which case the device will be inferred from environment variables
            (GPUS or CUDA_VISIBLE_DEVICES).
        control_freq (float): how many control signals to receive
            in every simulated second. This sets the amount of simulation time
            that passes between every action input.
        horizon (int): Every episode lasts for exactly @horizon timesteps.
        ignore_done (bool): True if never terminating the environment (ignore @horizon).
        hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else,
            only calls sim.reset and resets all robosuite-internal variables
        renderer (str): string for the renderer to use
        renderer_config (dict): dictionary for the renderer configurations
    Raises:
        ValueError: [Invalid renderer selection]
    """

    def __init__(
        self,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        render_gpu_device_id=-1,
        control_freq=20,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        renderer="mujoco",
        renderer_config=None,
    ):
        # First, verify that both the on- and off-screen renderers are not being used simultaneously
        if has_renderer is True and has_offscreen_renderer is True:
            raise ValueError("the onscreen and offscreen renderers cannot be used simultaneously.")

        # Rendering-specific attributes
        self.has_renderer = has_renderer
        self.has_offscreen_renderer = has_offscreen_renderer
        self.render_camera = render_camera
        self.render_collision_mesh = render_collision_mesh
        self.render_visual_mesh = render_visual_mesh
        self.render_gpu_device_id = render_gpu_device_id
        self.viewer = None

        # Simulation-specific attributes
        self._observables = {}                      # Maps observable names to observable objects
        self._obs_cache = {}                        # Maps observable names to pre-/partially-computed observable values
        self.control_freq = control_freq
        self.horizon = horizon
        self.ignore_done = ignore_done
        self.hard_reset = hard_reset
        self._model_postprocessor = None            # Function to post-process model after load_model() call
        self.model = None
        self.cur_time = None
        self.model_timestep = None
        self.control_timestep = None
        self.deterministic_reset = False            # Whether to add randomized resetting of objects / robot joints

        self.renderer = renderer
        self.renderer_config = renderer_config

        # Load the model
        self._load_model()

        # Post-process model
        self._postprocess_model()

        # Initialize the simulation
        self._initialize_sim()

        #initializes the rendering
        self.initialize_renderer()

        # Run all further internal (re-)initialization required
        self._reset_internal()

        # Load observables
        if hasattr(self.viewer, '_setup_observables'):
            self._observables = self.viewer._setup_observables()
        else:
            self._observables = self._setup_observables()

        # check if viewer has get observations method and set a flag for future use.
        self.viewer_get_obs = hasattr(self.viewer, '_get_observations')

    def initialize_renderer(self):
        self.renderer = self.renderer.lower()

        if self.renderer_config is None and self.renderer != 'mujoco':
            self.renderer_config = load_renderer_config(self.renderer)

        if self.renderer == 'mujoco' or self.renderer == 'default':
            pass
        elif self.renderer == 'nvisii':
            from robosuite.renderers.nvisii.nvisii_renderer import NVISIIRenderer

            self.viewer = NVISIIRenderer(env=self,
                                         **self.renderer_config)
        elif self.renderer == 'igibson':
            from robosuite.renderers.igibson.igibson_renderer import iGibsonRenderer

            self.viewer = iGibsonRenderer(env=self,
                                         **self.renderer_config
                                         )
        else:
            raise ValueError(f'{self.renderer} is not a valid renderer name. Valid options include default (native mujoco renderer), nvisii, and igibson')

    def initialize_time(self, control_freq):
        """
        Initializes the time constants used for simulation.
        Args:
            control_freq (float): Hz rate to run control loop at within the simulation
        """
        self.cur_time = 0
        self.model_timestep = macros.SIMULATION_TIMESTEP
        if self.model_timestep <= 0:
            raise ValueError("Invalid simulation timestep defined!")
        self.control_freq = control_freq
        if control_freq <= 0:
            raise SimulationError("Control frequency {} is invalid".format(control_freq))
        self.control_timestep = 1. / control_freq

    def set_model_postprocessor(self, postprocessor):
        """
        Sets the post-processor function that self.model will be passed to after load_model() is called during resets.
        Args:
            postprocessor (None or function): If set, postprocessing method should take in a Task-based instance and
                return no arguments.
        """
        self._model_postprocessor = postprocessor

    def _load_model(self):
        """Loads an xml model, puts it in self.model"""
        pass

    def _postprocess_model(self):
        """
        Post-processes model after load_model() call. Useful for external objects (e.g.: wrappers) to
        be able to modify the sim model before it is actually loaded into the simulation
        """
        if self._model_postprocessor is not None:
            self._model_postprocessor(self.model)

    def _setup_references(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        # Setup mappings from model to IDs
        self.model.generate_id_mappings(sim=self.sim)

    def _setup_observables(self):
        """
        Sets up observables to be used for this environment.
        Returns:
            OrderedDict: Dictionary mapping observable names to its corresponding Observable object
        """
        return OrderedDict()

    def _initialize_sim(self, xml_string=None):
        """
        Creates a MjSim object and stores it in self.sim. If @xml_string is specified, the MjSim object will be created
        from the specified xml_string. Else, it will pull from self.model to instantiate the simulation
        Args:
            xml_string (str): If specified, creates MjSim object from this filepath
        """
        # if we have an xml string, use that to create the sim. Otherwise, use the local model
        self.mjpy_model = load_model_from_xml(xml_string) if xml_string else self.model.get_model(mode="mujoco_py")

        # Create the simulation instance and run a single step to make sure changes have propagated through sim state
        self.sim = MjSim(self.mjpy_model)
        self.sim.forward()

        # Setup sim time based on control frequency
        self.initialize_time(self.control_freq)

    def reset(self):
        """
        Resets simulation.
        Returns:
            OrderedDict: Environment observation space after reset occurs
        """
        # TODO(yukez): investigate black screen of death
        # Use hard reset if requested

        if self.hard_reset and not self.deterministic_reset:
            if self.renderer == 'mujoco' or self.renderer == 'default':
                self._destroy_viewer()
            self._load_model()
            self._postprocess_model()
            self._initialize_sim()
        # Else, we only reset the sim internally
        else:
            self.sim.reset()
        
        # Reset necessary robosuite-centric variables
        self._reset_internal()
        self.sim.forward()
        # Setup observables, reloading if
        self._obs_cache = {}
        if self.hard_reset:
            # If we're using hard reset, must re-update sensor object references
            _observables = self._setup_observables()
            for obs_name, obs in _observables.items():
                self.modify_observable(observable_name=obs_name, attribute="sensor", modifier=obs._sensor)
        # Make sure that all sites are toggled OFF by default
        self.visualize(vis_settings={vis: False for vis in self._visualizations})

        if self.viewer is not None and self.renderer != 'mujoco':
            self.viewer.reset()
        
        observations = self.viewer._get_observations(force_update=True) if self.viewer_get_obs else self._get_observations(force_update=True)

        # Return new observations
        return observations

    def _reset_internal(self):
        """Resets simulation internal configurations."""

        # create visualization screen or renderer
        if self.has_renderer and self.viewer is None:
            self.viewer = MujocoPyRenderer(self.sim)
            self.viewer.viewer.vopt.geomgroup[0] = (1 if self.render_collision_mesh else 0)
            self.viewer.viewer.vopt.geomgroup[1] = (1 if self.render_visual_mesh else 0)

            # hiding the overlay speeds up rendering significantly
            self.viewer.viewer._hide_overlay = True

            # make sure mujoco-py doesn't block rendering frames
            # (see https://github.com/StanfordVL/robosuite/issues/39)
            self.viewer.viewer._render_every_frame = True

            # Set the camera angle for viewing
            if self.render_camera is not None:
                self.viewer.set_camera(camera_id=self.sim.model.camera_name2id(self.render_camera))

        elif self.has_offscreen_renderer:
            if self.sim._render_context_offscreen is None:
                render_context = MjRenderContextOffscreen(self.sim, device_id=self.render_gpu_device_id)
                self.sim.add_render_context(render_context)
            self.sim._render_context_offscreen.vopt.geomgroup[0] = (1 if self.render_collision_mesh else 0)
            self.sim._render_context_offscreen.vopt.geomgroup[1] = (1 if self.render_visual_mesh else 0)

        # additional housekeeping
        self.sim_state_initial = self.sim.get_state()
        self._setup_references()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

        # Empty observation cache and reset all observables
        self._obs_cache = {}
        for observable in self._observables.values():
            observable.reset()

    def _update_observables(self, force=False):
        """
        Updates all observables in this environment
        Args:
            force (bool): If True, will force all the observables to update their internal values to the newest
                value. This is useful if, e.g., you want to grab observations when directly setting simulation states
                without actually stepping the simulation.
        """
        for observable in self._observables.values():
            observable.update(timestep=self.model_timestep, obs_cache=self._obs_cache, force=force)

    def _get_observations(self, force_update=False):
        """
        Grabs observations from the environment.
        Args:
            force_update (bool): If True, will force all the observables to update their internal values to the newest
                value. This is useful if, e.g., you want to grab observations when directly setting simulation states
                without actually stepping the simulation.
        Returns:
            OrderedDict: OrderedDict containing observations [(name_string, np.array), ...]
        """
        observations = OrderedDict()
        obs_by_modality = OrderedDict()

        # Force an update if requested
        if force_update:
            self._update_observables(force=True)

        # Loop through all observables and grab their current observation
        for obs_name, observable in self._observables.items():
            if observable.is_enabled() and observable.is_active():
                obs = observable.obs
                observations[obs_name] = obs
                modality = observable.modality + "-state"
                if modality not in obs_by_modality:
                    obs_by_modality[modality] = []
                # Make sure all observations are numpy arrays so we can concatenate them
                array_obs = [obs] if type(obs) in {int, float} or not obs.shape else obs
                obs_by_modality[modality].append(np.array(array_obs))

        # Add in modality observations
        for modality, obs in obs_by_modality.items():
            # To save memory, we only concatenate the image observations if explicitly requested
            if modality == "image-state" and not macros.CONCATENATE_IMAGES:
                continue
            observations[modality] = np.concatenate(obs, axis=-1)

        return observations

    def step(self, action):
        """
        Takes a step in simulation with control command @action.
        Args:
            action (np.array): Action to execute within the environment
        Returns:
            4-tuple:
                - (OrderedDict) observations from the environment
                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) misc information
        Raises:
            ValueError: [Steps past episode termination]
        """
        if self.done:
            raise ValueError("executing action in terminated episode")

        self.timestep += 1

        # Since the env.step frequency is slower than the mjsim timestep frequency, the internal controller will output
        # multiple torque commands in between new high level action commands. Therefore, we need to denote via
        # 'policy_step' whether the current step we're taking is simply an internal update of the controller,
        # or an actual policy update
        policy_step = True

        # Loop through the simulation at the model timestep rate until we're ready to take the next policy step
        # (as defined by the control frequency specified at the environment level)
        for i in range(int(self.control_timestep / self.model_timestep)):
            self.sim.forward()
            self._pre_action(action, policy_step)
            self.sim.step()
            self._update_observables()
            policy_step = False

        # Note: this is done all at once to avoid floating point inaccuracies
        self.cur_time += self.control_timestep

        reward, done, info = self._post_action(action)

        if self.viewer is not None and self.renderer != 'mujoco':
            self.viewer.update()

        observations = self.viewer._get_observations() if self.viewer_get_obs else self._get_observations()
        return observations, reward, done, info

    def _pre_action(self, action, policy_step=False):
        """
        Do any preprocessing before taking an action.
        Args:
            action (np.array): Action to execute within the environment
            policy_step (bool): Whether this current loop is an actual policy step or internal sim update step
        """
        self.sim.data.ctrl[:] = action

    def _post_action(self, action):
        """
        Do any housekeeping after taking an action.
        Args:
            action (np.array): Action to execute within the environment
        Returns:
            3-tuple:
                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) empty dict to be filled with information by subclassed method
        """
        reward = self.reward(action)

        # done if number of elapsed timesteps is greater than horizon
        self.done = (self.timestep >= self.horizon) and not self.ignore_done

        return reward, self.done, {}

    def reward(self, action):
        """
        Reward should be a function of state and action
        Args:
            action (np.array): Action to execute within the environment
        Returns:
            float: Reward from environment
        """
        raise NotImplementedError

    def render(self):
        """
        Renders to an on-screen window.
        """
        self.viewer.render()

    def get_pixel_obs(self):
        """
        Gets the pixel observations for the environment from the specified renderer
        """
        self.viewer.get_pixel_obs()

    def close_renderer(self):
        """
        Closes the renderer
        """
        self.viewer.close()

    def observation_spec(self):
        """
        Returns an observation as observation specification.
        An alternative design is to return an OrderedDict where the keys
        are the observation names and the values are the shapes of observations.
        We leave this alternative implementation commented out, as we find the
        current design is easier to use in practice.
        Returns:
            OrderedDict: Observations from the environment
        """
        observation = self.viewer._get_observations() if self.viewer_get_obs else self._get_observations()
        return observation

    def clear_objects(self, object_names):
        """
        Clears objects with the name @object_names out of the task space. This is useful
        for supporting task modes with single types of objects, as in
        @self.single_object_mode without changing the model definition.
        Args:
            object_names (str or list of str): Name of object(s) to remove from the task workspace
        """
        object_names = {object_names} if type(object_names) is str else set(object_names)
        for obj in self.model.mujoco_objects:
            if obj.name in object_names:
                self.sim.data.set_joint_qpos(obj.joints[0], np.array((10, 10, 10, 1, 0, 0, 0)))

    def visualize(self, vis_settings):
        """
        Do any needed visualization here
        Args:
            vis_settings (dict): Visualization keywords mapped to T/F, determining whether that specific
                component should be visualized. Should have "env" keyword as well as any other relevant
                options specified.
        """
        # Set visuals for environment objects
        for obj in self.model.mujoco_objects:
            obj.set_sites_visibility(sim=self.sim, visible=vis_settings["env"])

    def set_camera_pos_quat(self, camera_pos, camera_quat):
        if self.renderer in ["nvisii", "igibson"]:
            self.viewer.set_camera_pos_quat(camera_pos, camera_quat)
        else:
            raise AttributeError('setting camera position and quat requires renderer to be either NVISII or iGibson.')

    def reset_from_xml_string(self, xml_string):
        """
        Reloads the environment from an XML description of the environment.
        Args:
            xml_string (str): Filepath to the xml file that will be loaded directly into the sim
        """

        # if there is an active viewer window, destroy it
        if self.renderer != 'nvisii':
            self.close()

        # Since we are reloading from an xml_string, we are deterministically resetting
        self.deterministic_reset = True

        # initialize sim from xml
        self._initialize_sim(xml_string=xml_string)

        # Now reset as normal
        self.reset()

        # Turn off deterministic reset
        self.deterministic_reset = False

    def check_contact(self, geoms_1, geoms_2=None):
        """
        Finds contact between two geom groups.
        Args:
            geoms_1 (str or list of str or MujocoModel): an individual geom name or list of geom names or a model. If
                a MujocoModel is specified, the geoms checked will be its contact_geoms
            geoms_2 (str or list of str or MujocoModel or None): another individual geom name or list of geom names.
                If a MujocoModel is specified, the geoms checked will be its contact_geoms. If None, will check
                any collision with @geoms_1 to any other geom in the environment
        Returns:
            bool: True if any geom in @geoms_1 is in contact with any geom in @geoms_2.
        """
        # Check if either geoms_1 or geoms_2 is a string, convert to list if so
        if type(geoms_1) is str:
            geoms_1 = [geoms_1]
        elif isinstance(geoms_1, MujocoModel):
            geoms_1 = geoms_1.contact_geoms
        if type(geoms_2) is str:
            geoms_2 = [geoms_2]
        elif isinstance(geoms_2, MujocoModel):
            geoms_2 = geoms_2.contact_geoms
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            # check contact geom in geoms
            c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1
            c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2 if geoms_2 is not None else True
            # check contact geom in geoms (flipped)
            c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1
            c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2 if geoms_2 is not None else True
            if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1):
                return True
        return False

    def get_contacts(self, model):
        """
        Checks for any contacts with @model (as defined by @model's contact_geoms) and returns the set of
        geom names currently in contact with that model (excluding the geoms that are part of the model itself).
        Args:
            model (MujocoModel): Model to check contacts for.
        Returns:
            set: Unique geoms that are actively in contact with this model.
        Raises:
            AssertionError: [Invalid input type]
        """
        # Make sure model is MujocoModel type
        assert isinstance(model, MujocoModel), \
            "Inputted model must be of type MujocoModel; got type {} instead!".format(type(model))
        contact_set = set()
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            # check contact geom in geoms; add to contact set if match is found
            g1, g2 = self.sim.model.geom_id2name(contact.geom1), self.sim.model.geom_id2name(contact.geom2)
            if g1 in model.contact_geoms and g2 not in model.contact_geoms:
                contact_set.add(g2)
            elif g2 in model.contact_geoms and g1 not in model.contact_geoms:
                contact_set.add(g1)
        return contact_set

    def add_observable(self, observable):
        """
        Adds an observable to this environment.
        Args:
            observable (Observable): Observable instance.
        """
        assert observable.name not in self._observables,\
            "Observable name {} is already associated with an existing observable! Use modify_observable(...) " \
            "to modify a pre-existing observable.".format(observable.name)
        self._observables[observable.name] = observable

    def modify_observable(self, observable_name, attribute, modifier):
        """
        Modifies observable with associated name @observable_name, replacing the given @attribute with @modifier.
        Args:
             observable_name (str): Observable to modify
             attribute (str): Observable attribute to modify.
                Options are {`'sensor'`, `'corrupter'`,`'filter'`,  `'delayer'`, `'sampling_rate'`,
                `'enabled'`, `'active'`}
             modifier (any): New function / value to replace with for observable. If a function, new signature should
                match the function being replaced.
        """
        # Find the observable
        assert observable_name in self._observables, "No valid observable with name {} found. Options are: {}".\
            format(observable_name, self.observation_names)
        obs = self._observables[observable_name]
        # replace attribute accordingly
        if attribute == "sensor":
            obs.set_sensor(modifier)
        elif attribute == "corrupter":
            obs.set_corrupter(modifier)
        elif attribute == "filter":
            obs.set_filter(modifier)
        elif attribute == "delayer":
            obs.set_delayer(modifier)
        elif attribute == "sampling_rate":
            obs.set_sampling_rate(modifier)
        elif attribute == "enabled":
            obs.set_enabled(modifier)
        elif attribute == "active":
            obs.set_active(modifier)
        else:
            # Invalid attribute specified
            raise ValueError("Invalid observable attribute specified. Requested: {}, valid options are {}".
                             format(attribute, {"sensor", "corrupter", "filter", "delayer",
                                                "sampling_rate", "enabled", "active"}))

    def _check_success(self):
        """
        Checks if the task has been completed. Should be implemented by subclasses
        Returns:
            bool: True if the task has been completed
        """
        raise NotImplementedError

    def _destroy_viewer(self):
        """
        Destroys the current mujoco renderer instance if it exists
        """
        # if there is an active viewer window, destroy it
        if self.viewer is not None:
            self.viewer.close()  # change this to viewer.finish()?
            self.viewer = None

    def close(self):
        """Do any cleanup necessary here."""
        self._destroy_viewer()

    @property
    def observation_modalities(self):
        """
        Modalities for this environment's observations
        Returns:
            set: All observation modalities
        """
        return set([observable.modality for observable in self._observables.values()])

    @property
    def observation_names(self):
        """
        Grabs all names for this environment's observables
        Returns:
            set: All observation names
        """
        return set(self._observables.keys())

    @property
    def enabled_observables(self):
        """
        Grabs all names of enabled observables for this environment. An observable is considered enabled if its values
        are being continually computed / updated at each simulation timestep.
        Returns:
            set: All enabled observation names
        """
        return set([name for name, observable in self._observables.items() if observable.is_enabled()])

    @property
    def active_observables(self):
        """
        Grabs all names of active observables for this environment. An observable is considered active if its value is
        being returned in the observation dict from _get_observations() call or from the step() call (assuming this
        observable is enabled).
        Returns:
            set: All active observation names
        """
        return set([name for name, observable in self._observables.items() if observable.is_active()])

    @property
    def _visualizations(self):
        """
        Visualization keywords for this environment
        Returns:
            set: All components that can be individually visualized for this environment
        """
        return {"env"}

    @property
    def action_spec(self):
        """
        Action specification should be implemented in subclasses.
        Action space is represented by a tuple of (low, high), which are two numpy
        vectors that specify the min/max action limits per dimension.
        """
        raise NotImplementedError

    @property
    def action_dim(self):
        """
        Size of the action space
        Returns:
            int: Action space dimension
        """
        raise NotImplementedError
Example #27
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self,
                 env_name,
                 rand_seed,
                 maximum_length,
                 model_path,
                 frame_skip,
                 misc_info={}):
        self._env_name = env_name
        self._rand_seed = rand_seed
        self._maximum_length = maximum_length
        self._misc_info = misc_info

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()

        # Why is this here? Causes errors
        #observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        #assert not done
        #self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        # annoying should replace
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed(self._rand_seed)

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(self._rand_seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def get_env_state(self):
        """
        Get full state of the environment beyond qpos and qvel
        For example, if targets are defined using sites, this function should also
        contain location of the sites (which are not included in qpos).
        Must return a dictionary that can be used in the set_env_state function
        """
        raise NotImplementedError

    def set_env_state(self, state):
        """
        Uses the state dictionary to set the state of the world
        """
        raise NotImplementedError

    # -----------------------------

    def reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob, 0, False, {}

    def reset_soft(self, seed=None):
        return self._old_obs, 0, False, {}

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640, 480),
                                   mode='exploration',
                                   save_loc='./tmp/',
                                   filename='newvid',
                                   it=0,
                                   camera_name=None):

        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o, *_ = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy(o)
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=frame_size[0],
                                             height=frame_size[1],
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + str(it) + ".mp4"

            if not os.path.exists(save_loc):
                os.makedirs(save_loc)

            imageio.mimwrite(file_name, np.asarray(arrs), fps=10.0)
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))
Example #28
0
class Dribble_Env(object):
    def __init__(self):
        self.model = load_model_from_path("./xml/world5.xml") 
        self.sim = MjSim(self.model)
        # self.viewer = MyMjViewer(self.sim)
        self.viewer = MyMjViewer(self.sim)
        self.max_vel = [-1000,1000]
        self.x_motor = 0
        self.y_motor = 0
        self.date_time = datetime.datetime.now()
        self.path = "./datas/path_date" + str(self.date_time.strftime("_%Y%m%d_%H%M%S"))
        os.mkdir(self.path)

    def step(self,action):
        self.x_motor = ((action %3)-1) * 200
        self.y_motor = ((action//3)-1) * 200
        self.sim.data.ctrl[0] = self.x_motor 
        self.sim.data.ctrl[1] = self.y_motor
        self.sim.step()

    def get_state(self):
        robot_x, robot_y = self.sim.data.body_xpos[1][0:2]
        robot_xv, robot_yv = self.sim.data.qvel[0:2]
        ball_x, ball_y = self.sim.data.body_xpos[2][0:2]
        ball_xv, ball_yv = self.sim.data.qvel[2:4]
        ball_pos_local = -(robot_x - ball_x), -(robot_y - ball_y)
        object1_x, object1_y= self.sim.data.body_xpos[3][0:2]
        object2_x, object2_y= self.sim.data.body_xpos[4][0:2]
        object3_x, object3_y= self.sim.data.body_xpos[5][0:2]
        object4_x, object4_y= self.sim.data.body_xpos[6][0:2]
        closest_object_id = np.argmin([np.linalg.norm([object1_x - robot_x, object1_y - robot_y]),\
                                      np.linalg.norm([object2_x - robot_x, object2_y - robot_y]),\
                                      np.linalg.norm([object3_x - robot_x, object3_y - robot_y]),\
                                      np.linalg.norm([object4_x - robot_x, object4_y - robot_y])])
        
        object_local_x = -(robot_x - self.sim.data.body_xpos[closest_object_id+3][0])
        object_local_y = -(robot_y - self.sim.data.body_xpos[closest_object_id+3][1])

        return [robot_x, robot_y, ball_pos_local[0], ball_pos_local[1], \
                robot_xv, robot_yv,object_local_x,object_local_y,ball_x, ball_y,ball_xv,ball_yv]

    def check_done(self):
        ball_x ,ball_y = self.get_state()[8:10]
        if ball_x > 80 and -25 < ball_y < 25:
            return True
        else:
            return False

    def check_wall(self):
        ball_x, ball_y = self.get_state()[8:10]
        if math.fabs(ball_y) > 51:
            return True
        elif ball_x > 81 and math.fabs(ball_y) > 25:
            return True
        else:
            return False
        
    def check_avoidaince(self,object_num=4):
        for i in range(object_num):
            if math.fabs(self.sim.data.qvel[5+i*3]) > 0.1 or math.fabs(self.sim.data.qvel[6+3*i]) > 0.1:
                return True
        return False

    def reset(self):
        self.x_motor = 0
        self.y_motor = 0

        self.robot_x_data = []
        self.robot_y_data = []
        self.ball_x_data = []
        self.ball_y_data = []
        self.sim.reset()
        # self.sim.data.qpos[0] = np.random.randint(-3,3)
        self.sim.data.qpos[1] = np.random.randint(-5,5)

    def render(self):
        self.viewer.render()

    def plot_data(self,step,t,done,episode,flag,reward):
        self.field_x = [-90,-90,90,90,-90]
        self.field_y = [-60,60,60,-60,-60]
        self.robot_x_data.append(self.sim.data.body_xpos[1][0])
        self.robot_y_data.append(self.sim.data.body_xpos[1][1])
        self.ball_x_data.append(self.sim.data.body_xpos[2][0])
        self.ball_y_data.append(self.sim.data.body_xpos[2][1])

        datas = str(self.robot_x_data[-1])+" "+str(self.robot_y_data[-1])+" "+str(self.ball_x_data[-1])+" "+str(self.ball_y_data[-1])+" "+str(reward)
        with open(self.path + '/plotdata_' + str(episode+1).zfill(4)+ '.txt','a') as f:
            f.write(str(datas)+'\n')
        
        if (t >= step-1 or done) and flag:
            fig1 = plt.figure()
            plt.ion()
            plt.show()
            plt.plot(self.ball_x_data,self.ball_y_data,marker='o',markersize=2,color="red",label="ball")
            plt.plot(self.robot_x_data,self.robot_y_data,marker="o",markersize=2,color='blue',label="robot")
            plt.plot(self.sim.data.body_xpos[3][0],self.sim.data.body_xpos[3][1],marker="o",markersize=8,color='black')
            plt.plot(self.sim.data.body_xpos[4][0],self.sim.data.body_xpos[4][1],marker="o",markersize=8,color='black')
            plt.plot(self.sim.data.body_xpos[5][0],self.sim.data.body_xpos[5][1],marker="o",markersize=8,color='black')
            plt.plot(self.sim.data.body_xpos[6][0],self.sim.data.body_xpos[6][1],marker="o",markersize=8,color='black')
            plt.plot(self.field_x,self.field_y,markersize=1,color="black")
            plt.plot(80,0,marker="X",color="green",label="goal")
            plt.legend(loc="lower right")
            plt.draw()
            plt.pause(0.001)
            plt.close(1)
Example #29
0
class PushPuckBase(ABC):
    def __init__(self,
                 n_substeps: int = 1,
                 render: bool = False):
        self.render = render

        set_puck(raw_xml_path=self.raw_xml_path, xml_path=self.xml_path, puck_size=None, puck_pos=None)
        model = load_model_from_path(self.xml_path)

        self.sim = MjSim(model=model, nsubsteps=n_substeps)
        self.viewer = MjViewer(self.sim) if render else None

        self.reset()

    def __call__(self, weights, extra_timesteps=1000):
        self.reset()
        return self.rollout(weights, extra_timesteps)

    def reset(self) -> None:
        """Resets the environment (including the agent) to the initial conditions.
        """
        self.sim.reset()
        # Set initial position and velocity
        self.qpos = self.sim.data.qpos.copy()
        self.qpos[:self.robot_init_qpos.shape[0]] = self.robot_init_qpos
        qvel = np.zeros(self.sim.data.qvel.shape)
        mjSimState = MjSimState(time=0.0, qpos=self.qpos, qvel=qvel, act=None, udd_state={})
        self.sim.set_state(mjSimState)
        self.sim.forward()

    @property
    def xml_path(self) -> str:
        return str(Path(__file__).resolve().parents[0]) + '/' + 'assets/xml_model/env_model.xml'

    def set_target(self, target_pos) -> None:
        """
        Sets the postion of the target.
        :param target_pos: Desired target position
        """
        if target_pos is None:
            target_pos = [0.7, 0, 0.02]

        body_id = self.sim.model.body_name2id('target')
        self.sim.model.body_pos[body_id] = target_pos
        self.sim.forward()

    @property
    def target_pos(self):
        """
        Helper for getting the current target position.
        :return: Current target position
        """
        return self.sim.data.get_site_xpos('target:site1').copy()

    @property
    def puck_pos(self):
        """
        Helper for getting the current puck position.
        :return: Current puck position
        """
        return self.sim.data.get_body_xpos('puck').copy()

    @property
    def tcp_pos(self):
        """
        Helper for getting the current end effector position.
        :return: Current end effector position
        """
        return self.sim.data.get_body_xpos('tcp').copy()

    @property
    @abstractmethod
    def raw_xml_path(self):
        raise NotImplementedError

    @property
    @abstractmethod
    def robot_init_qpos(self):
        raise NotImplementedError

    @property
    @abstractmethod
    def robot_final_qpos(self):
        raise NotImplementedError

    @abstractmethod
    def rollout(self, weights, goal_pos, extra_timesteps=200):
        raise NotImplementedError
Example #30
0
class YuMi(gym.GoalEnv):

    metadata = {'render.modes': ['human']}

    def __init__(self,
                 path,
                 dynamics,
                 task,
                 goal_env=False,
                 hertz=25,
                 render=True,
                 seed=0,
                 logging_level=logging.INFO):
        logging.basicConfig(level=logging_level)
        tf.set_random_seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        self.goal_env = goal_env
        self.task = task

        self.path = path
        model = load_model_from_path(path)

        self.joint_idx = []
        for name in sorted(model.joint_names):
            if 'yumi' not in name:
                continue
            self.joint_idx.append(model.joint_name2id(name))

        self.joint_states_pos = None
        self.joint_states_vel = None
        self.previous_action = np.zeros(14)
        self.target_hist = deque(maxlen=2)
        self.hertz = hertz
        self.steps = 0
        self.should_render = render
        self.steps_per_action = int(1.0 / hertz / model.opt.timestep)
        model.nuserdata = 14
        self.viewer = None
        self.model = model

        self._set_joint_limits()

        self.generate_dynamics = dynamics

    @property
    def model(self):
        return self._model

    @model.setter
    def model(self, model):
        self._model = model
        self.sim = MjSim(model, udd_callback=self.callback)
        self.data = self.sim.data
        if self.should_render and not self.viewer:
            self.viewer = MjViewer(self.sim)
            self.viewer.cam.azimuth = 180
            self.viewer.cam.elevation = -15
            self.viewer._hide_overlay = True

    @property
    def horizon(self):
        return int(self.hertz * MAX_TIME)

    def get_horizon(self):
        return self.horizon

    @property
    def action_space(self):
        return spaces.Box(low=-0.1, high=0.1, shape=(14, ), dtype=np.float32)

    @property
    def observation_space(self):
        d = spaces.Dict({
            'observation':
            spaces.Box(low=-np.inf,
                       high=np.inf,
                       shape=(91, ),
                       dtype=np.float32),
            'desired_goal':
            spaces.Box(low=-np.inf, high=np.inf, shape=(6, ),
                       dtype=np.float32),
            'achieved_goal':
            spaces.Box(low=-np.inf, high=np.inf, shape=(6, ), dtype=np.float32)
        })
        if self.goal_env:
            return d
        else:
            return d['observation']

    def get_step(self):
        return self.steps

    def simstep(self):
        try:
            if self.sim.nsubsteps == self.steps_per_action:
                terminal = self.sim.step()
            else:
                # TODO: randomize substeps
                for step in range(self.steps_per_action):
                    terminal = self.sim.step()
                    if terminal:
                        break

        except Exception as e:
            print('Caught exception: ', e)
            with open('exception.txt', 'w') as f:
                f.write(str(e))
            terminal = True

        self.render()
        return terminal

    def callback(self, state):
        if self.joint_states_pos is None:
            return

        model = state.model
        nu = model.nu
        data = state.data

        dt = 1 / self.hertz

        if VELOCITY_CONTROLLER:
            action = np.empty(nu)
            action[:] = data.userdata[:nu]
            ctrl = action - self.joint_states_vel
            data.qacc[self.joint_idx] = ctrl * 1.0 / dt
        else:
            action = np.empty(model.nu)
            for i in range(len(self.joint_idx)):
                pid = self.pids[i]
                qpos = self.joint_states_pos[i]
                action[i] = pid(qpos, data.time)
            res = action
            res -= self.joint_states_vel * dt
            data.qacc[self.joint_idx] = (1.0 / dt**2) * res

        functions.mj_inverse(model, data)
        data.qfrc_applied[self.joint_idx] = data.qfrc_inverse[self.joint_idx]

    def reset(self):

        self.steps = 0
        self.joint_states_pos = None
        self.joint_states_vel = None
        self.data.qacc[:] = 0
        self.data.qvel[:] = 0

        self.pids = []
        for i in range(self.model.nu):
            pid = PID(Kp=1.0, Kd=0.001, sample_time=0)
            pid.output_limits = (self.action_space.low[i],
                                 self.action_space.high[i])
            self.pids.append(pid)

        _ = self.randomize_dynamics(self.model)
        self.sim.reset()

        self.set_initial_pose()
        self.sim.forward()

        obs = self.get_observations()
        return obs

    def set_initial_pose(self):
        self.data.set_joint_qpos('yumi_joint_1_l', 1.)
        self.data.set_joint_qpos('yumi_joint_1_r', -1.)
        self.data.set_joint_qpos('yumi_joint_2_l', 0.1)
        self.data.set_joint_qpos('yumi_joint_2_r', 0.1)
        ''' randomize goal
        goal_id = self.model.body_name2id('goal')
        pos = self.model.body_pos[goal_id]
        pos[0:2] = [0.5, 0]
        pos[0:2] += np.random.uniform(-0.05, 0.05, size=(2,))
        self.model.body_pos[goal_id] = pos
        '''

        self.sim.forward()
        target_start, target_end = self.model.get_joint_qpos_addr('target')
        target_qpos = self.data.qpos[target_start:target_end]

        target_quat = target_qpos[3:]

        if self.task == 0:
            # rotate y=-90 deg
            quat = rotations.euler2quat(np.array([0, -np.pi / 2, 0]))
            z_idx = 0
        elif self.task == 1:
            # rotate x=90 deg
            quat = rotations.euler2quat(np.array([np.pi / 2, 0, 0]))
            z_idx = 1
        elif self.task == 2:
            # do nothing
            quat = rotations.euler2quat(np.array([0, 0, 0]))
            z_idx = 2
        elif self.task == 3:
            # rotate z=-90 deg
            quat = rotations.euler2quat(np.array([0, 0, -np.pi / 2]))
            z_idx = 2
        else:
            raise Exception('Additional tasks not implemented.')

        target_id = self.model.geom_name2id('target')
        target_qpos[0] = 0.5 + np.random.uniform(-0.01, 0.01)
        target_qpos[1] = np.random.uniform(0.14, 0.15)
        height = self.model.geom_size[target_id][z_idx]
        target_qpos[2] = 0.051 + height

        goal_id = self.model.body_name2id('goal')
        body_pos = self.model.body_pos[goal_id]
        body_quat = self.model.body_quat[goal_id]
        body_pos[2] = target_qpos[2]
        body_quat[:] = quat

        perturbation = np.zeros(3)
        perturbation[z_idx] = np.random.uniform(-0.2, 0.2)
        euler = rotations.quat2euler(quat)
        euler = rotations.subtract_euler(euler, perturbation)
        target_qpos[3:] = rotations.euler2quat(euler)

    def quat2mat(self, quat):
        result = np.empty(9, dtype=np.double)
        functions.mju_quat2Mat(result, np.asarray(quat))
        return result

    def mat2quat(self, mat):
        result = np.empty(4, dtype=np.double)
        functions.mju_mat2Quat(result, np.asarray(mat))
        return result

    def get_observations(self):
        """
        This functions looks a bit awkward since it is trying to replicate
        the order of observations when listening to transforms using ROS TF.
        """
        obs = []
        observations = {}

        pos = []
        vel = []

        model = self.model
        data = self.data

        # Previous policy in ROS used translations between yumi_base_link
        # and requested frames.
        # We subtract the xpos of yumi_base_link to mimic that behavior.
        base_link_xpos = data.get_body_xpos('yumi_base_link')

        # TODO Look at this
        for i, joint in enumerate(self.joint_idx):
            pos.append(data.qpos[joint])
            vel.append(data.qvel[joint])
            name = model.joint_names[joint]
            assert model.get_joint_qpos_addr(name) == joint
        obs.extend(pos)
        obs.extend(vel)

        self.joint_states_pos = np.array(pos) + np.random.normal(
            0, 0.001, size=len(pos))
        self.joint_states_vel = np.array(vel) + np.random.normal(
            0, 0.001, size=len(vel))

        name = 'site:goal'
        pos = data.get_site_xpos(name)
        pos -= base_link_xpos
        mat = data.get_site_xmat(name)
        mat = mat.reshape(-1)
        obs.extend(pos)
        obs.extend(mat)

        names = ['left_gripper_base', 'right_gripper_base']
        for name in names:
            pos = data.get_body_xpos(name)
            pos -= base_link_xpos
            quat = data.get_body_xquat(name)
            mat = self.quat2mat(quat)
            obs.extend(pos)
            obs.extend(mat)

        name = 'site:target'
        pos = data.get_site_xpos(name)
        pos -= base_link_xpos
        mat = data.get_site_xmat(name)
        mat = mat.reshape(-1)

        observations['desired_goal'] = self.get_desired_goal()
        observations['achieved_goal'] = self.get_achieved_goal()

        while len(self.target_hist) < self.target_hist.maxlen:
            self.target_hist.append(np.hstack([pos, mat]))
        self.target_hist.append(np.hstack([pos, mat]))

        obs.extend(self.target_hist[-1])
        obs.extend(
            (self.target_hist[-1] - self.target_hist[-2]) / (1 / self.hertz))

        target_id = model.geom_name2id('target')
        size = model.geom_size[target_id]
        obs.extend(size)

        obs = np.array(obs)
        observations['observation'] = obs
        if self.goal_env:
            return observations
        else:
            return obs

    def render(self):
        if self.should_render:
            if False and self.sim.nsubsteps == self.steps_per_action and self.steps % 10 != 0:
                return

            self.viewer.render()

    def step(self, action):
        action = .1 * action

        self.steps += 1

        reward = 0
        terminal = False
        # Check for early termination
        terminal, force_penalty = self.bad_collision()
        if terminal:
            reward = -10

        # Eventhough limits are specified in action_space, they
        # are not honored by baselines so we clip them
        action = np.clip(action, self.action_space.low, self.action_space.high)

        idx = 0
        if self.joint_states_pos[idx] > 1.2:
            action[0] = min(action[idx], 0)
        elif self.joint_states_pos[0] < 0.8:
            action[0] = max(action[0], 0)

        idx = 1
        if self.joint_states_pos[idx] < -1.2:
            action[idx] = max(action[idx], 0)
        elif self.joint_states_pos[idx] > -0.8:
            action[idx] = min(action[idx], 0)

        for idx in [2, 3, 4, 5]:
            if self.joint_states_pos[idx] > 0.4:
                action[idx] = min(action[idx], 0)
            if self.joint_states_pos[idx] < -0.3:
                action[idx] = max(action[idx], 0)

        idx = -2
        if self.joint_states_pos[idx] > 0.2:
            action[idx] = min(action[idx], 0)
        elif self.joint_states_pos[idx] < -0.4:
            action[idx] = max(action[idx], 0)

        idx = -1
        if self.joint_states_pos[idx] < -0.2:
            action[idx] = max(action[idx], 0)
        elif self.joint_states_pos[idx] > 0.4:
            action[idx] = min(action[idx], 0)

        if VELOCITY_CONTROLLER:
            action += self.joint_states_vel
        else:
            action += self.joint_states_pos

        for i, a in enumerate(action):
            pid = self.pids[i]
            pid.setpoint = a

        # Perturbate action
        #action += np.random.normal(0, 0.001, size=len(action))

        stop_early = self.simstep()
        terminal = terminal or stop_early
        obs = self.get_observations()

        if not terminal:
            reward = self.compute_reward(self.get_achieved_goal(),
                                         self.get_desired_goal(), {})
            #reward -= force_penalty
            #logger.debug('force penalty: %f' % force_penalty)
            terminal = self.steps == self.horizon

        if self.steps % self.hertz == 0:
            logging.info('Step: {}, reward: {}'.format(self.steps, reward))
            if 0.8 < reward and self.steps == self.horizon:
                logging.info('**** LOOKING GOOD ****')

        return obs, reward, terminal, {}

    def get_distance(self, a, b):
        return np.linalg.norm(a - b, axis=-1)

    def get_goal_distance(self, achieved_goal, desired_goal):
        pos1, pos2 = achieved_goal[:], desired_goal[:]
        pos_distance = self.get_distance(pos1, pos2)
        return pos_distance

    def compute_reward(self, achieved_goal, desired_goal, info):
        pos_distance = self.get_goal_distance(achieved_goal, desired_goal)
        pos_reward = self.get_pos_reward(pos_distance)

        euler1, euler2 = achieved_goal[3:], desired_goal[3:]
        ang_distance = np.linalg.norm(rotations.subtract_euler(euler1, euler2),
                                      axis=-1)
        ang_distance_ratio = 0.5
        ang_distance_penalty = ang_distance_ratio * ang_distance

        reward = pos_reward - ang_distance_penalty

        if self.steps % 10 == 0:
            logging.debug('Reward: %f, pos reward: %f, ang penalty: %f' %
                          (reward, pos_reward, ang_distance_penalty))
        return reward

    def get_pos_reward(self, distance, close=0.01, margin=0.2):
        return max(0, 1 - distance / margin)

    def _set_joint_limits(self):
        xml_str = self.model.get_xml()
        tree = EE.fromstring(xml_str)
        low, high = [], []
        for name in sorted(self.model.joint_names):
            if 'yumi' not in name:
                continue
            limit = tree.find(".//joint[@name='{}']".format(name))
            limit_range = [float(x) for x in limit.get('range').split(' ')]
            low.append(limit_range[0])
            high.append(limit_range[1])
        self.joint_limits = spaces.Box(low=np.array(low),
                                       high=np.array(high),
                                       dtype=np.float32)

    def bad_collision(self):
        bad_collision = False
        force_penalty = 0

        for i in range(self.data.ncon):
            contact = self.data.contact[i]
            geom1 = contact.geom1
            geom2 = contact.geom2
            bodyid1 = self.model.geom_bodyid[geom1]
            bodyid2 = self.model.geom_bodyid[geom2]

            bodyname1 = self.model.body_id2name(bodyid1)
            bodyname2 = self.model.body_id2name(bodyid2)

            is_target = 'target' in bodyname1 or 'target' in bodyname2
            is_target = is_target or 'box-composite' in bodyname1 or 'box-composite' in bodyname2
            is_table = 'table' in bodyname1 or 'table' in bodyname2

            if is_target and is_table:
                continue
            elif is_target:
                continue
                sim = self.sim
                body1_cfrc = sim.data.cfrc_ext[bodyid1]
                body1_contact_force_norm = np.sqrt(
                    np.sum(np.square(body1_cfrc)))
                body2_cfrc = sim.data.cfrc_ext[bodyid2]
                body2_contact_force_norm = np.sqrt(
                    np.sum(np.square(body2_cfrc)))

                force_penalty = body1_contact_force_norm + body2_contact_force_norm
            else:
                bad_collision = True
                if bad_collision:
                    print('body is: ', bodyname1, bodyname2)
                    break

        return bad_collision, force_penalty

    def get_dynamics(self):
        return self.dynamics

    def randomize_dynamics(self, model):
        self.dynamics = self.generate_dynamics()
        fri, icom = self.dynamics

        try:
            id = model.body_name2id('insidebox')
            model.body_pos[id][-1] = icom

            for pair in range(model.npair):
                tableid = model.geom_name2id('table')
                targetid = model.geom_name2id('target')
                if ((model.pair_geom1 == tableid
                     and model.pair_geom2 == targetid)
                        or (model.pair_geom2 == tableid
                            and model.pair_geom1 == targetid)):

                    pair_friction = model.pair_friction[pair]
                    pair_friction[:2] = [fri, fri]

            logging.debug('Dynamics: {}'.format(self.dynamics))
        except:
            pass

        self.sim.forward()
        return model

    def get_desired_goal(self):
        return self.get_site_pose('site:goal')

    def get_achieved_goal(self):
        return self.get_site_pose('site:target')

    def get_site_pose(self, site):
        xpos = self.data.get_site_xpos(site)
        euler = rotations.mat2euler(self.data.get_site_xmat(site))
        return np.hstack([xpos, euler])

    def screenshot(self, image_path='image.png'):
        import imageio
        self.viewer._hide_overlay = True
        self.viewer.render()
        width, height = 2560, 1440
        img = self.viewer.read_pixels(width, height, depth=False)
        # original image is upside-down, so flip it
        img = img[::-1, :, :]
        imageio.imwrite(image_path, img)
Example #31
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    def evaluate_success(self, paths, logger=None):
        """
        Log various success metrics calculated based on input paths into the logger
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640,480),
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1],
                                             mode='offscreen', camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))
Example #32
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30
    }

    def __init__(self, model_path, frame_skip):
        print(model_path)
        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = mujoco_py.load_model_from_path(fullpath)
        self.timestep = self.model.opt.timestep * self.frame_skip
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None

        # changed a place of setting action space
        # sometimes use self.action_space at first call self._step()
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = observation.size

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def viewer_setup(self):
        """
        This method is called when the viewer is initialized and after every reset
        Optionally implement this method, if you need to tinker with camera position
        and so forth.
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        ob = self.reset_model()
        if self.viewer is not None:
            # TODO:
            # self.viewer.autoscale()
            self.viewer_setup()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        sim_state = self.sim.get_state()
        sim_state.qpos[:] = qpos[:]
        sim_state.qvel[:] = qvel[:]
        self.sim.set_state(sim_state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        assert len(ctrl) is len(self.sim.data.ctrl)
        self.data.ctrl[:] = ctrl[:]
        for _ in range(n_frames):
            self.sim.step()

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer = None
            return

        if mode == 'rgb_array':
            data = self._get_viewer()._read_pixels_as_in_window()
            return data
        elif mode == 'human':
            self._get_viewer().render()
            return

    def _get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            self.viewer_setup()
        return self.viewer

    def get_body_com(self, body_name):
        return self.sim.data.get_body_xipos(body_name)

    def get_body_comvel(self, body_name):
        return self.sim.data.get_body_cvel(self.model.body_name2id(body_name))

    def get_body_xmat(self, body_name):
        return self.sim.data.get_body_xmat(body_name).reshape((3, 3))

    def state_vector(self):
        return np.concatenate([
            self.data.qpos.flat,
            self.data.qvel.flat
        ])
Example #33
0
class WindySlope(gym.Env):
    def __init__(self,
                 model,
                 mode,
                 hertz=25,
                 should_render=True,
                 should_screenshot=False):
        self.hertz = hertz
        self.steps = 0
        self.should_render = should_render
        self.should_screenshot = should_screenshot
        self.nsubsteps = int(MAX_TIME / model.opt.timestep / 100)
        self.viewer = None
        self.model = model
        self.mode = mode
        self.enabled = True
        self.metadata = {'render.modes': 'rgb_array'}
        self.should_record = False

    def close(self):
        pass

    @property
    def observation_space(self):
        return Box(low=-np.inf, high=np.inf, shape=(18, ))

    @property
    def action_space(self):
        return Box(low=-np.inf, high=np.inf, shape=(0, ))

    @property
    def model(self):
        return self._model

    @model.setter
    def model(self, model):
        self._model = model
        self.sim = MjSim(model)
        self.data = self.sim.data

        if self.should_render:
            if self.viewer:
                self.viewer.sim = sim
                return
            self.viewer = MjViewer(self.sim)
            self.viewer.cam.azimuth = 45
            self.viewer.cam.elevation = -20
            self.viewer.cam.distance = 25
            self.viewer.cam.lookat[:] = [0, 0, -2]
            self.viewer.scn.flags[3] = 0

    def reset(self):
        self.sim.reset()
        self.steps = 0

        self.sim.forward()

        obs = self.get_observations(self.model, self.data)
        return obs

    def get_observations(self, model, data):
        self.sim.forward()
        obs = []
        name = 'box'
        pos = data.get_body_xpos(name)
        xmat = data.get_body_xmat(name).reshape(-1)
        velp = data.get_body_xvelp(name)
        velr = data.get_body_xvelr(name)

        for x in [pos, xmat, velp, velr]:
            obs.extend(x.copy())

        obs = np.array(obs, dtype=np.float32)
        return obs

    def screenshot(self, image_path):
        self.viewer.hide_overlay = True
        self.viewer.render()
        width, height = 2560, 1440
        #width, height = 1,1
        img = self.viewer.read_pixels(width, height, depth=False)
        # original image is upside-down, so flip it
        img = img[::-1, :, :]
        imageio.imwrite(image_path, img)

    def step(self, action):
        nsubsteps = self.nsubsteps
        for _ in range(nsubsteps):
            self.sim.step()
            self.render()
        self.steps += 1

        return self.get_observations(self.model,
                                     self.data), 1, self.steps == 100, {}

    def render(self, mode=None):
        if self.should_render:
            self.viewer._overlay.clear()

            def nothing():
                return

            self.viewer._create_full_overlay = nothing
            wind = model.opt.wind[0]
            self.viewer.add_overlay(1, "Wind", "{:.2f}".format(wind))
            self.viewer.render()
            if self.should_record:
                width, height = 2560, 1440
                img = self.viewer.read_pixels(width, height, depth=False)
                # original image is upside-down, so flip it
                img = img[::-1, :, :]
                return img

    def euler2quat(self, euler):
        euler = np.asarray(euler, dtype=np.float64)
        assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler)

        ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2
        si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
        ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
        cc, cs = ci * ck, ci * sk
        sc, ss = si * ck, si * sk

        quat = np.empty(euler.shape[:-1] + (4, ), dtype=np.float64)
        quat[..., 0] = cj * cc + sj * ss
        quat[..., 3] = cj * sc - sj * cs
        quat[..., 2] = -(cj * ss + sj * cc)
        quat[..., 1] = cj * cs - sj * sc
        return quat

    def degrees2radians(self, degrees):
        return degrees * np.pi / 180
Example #34
0
class DeepMimicEnv(object):
    def __init__(self):
        file_path = '/home/mingfei/Documents/DeepMimic/mujoco/humanoid_deepmimic/envs/asset/dp_env_v1.xml'
        with open(file_path, 'r') as fin:
            MODEL_XML = fin.read()

        self.model = load_model_from_xml(MODEL_XML)
        self.sim = MjSim(self.model)
        self.viewer = MjViewer(self.sim)

        mocap_filepath = '/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_backflip.txt'

        self.mocap = MocapDM()
        self.mocap.load_mocap(mocap_filepath)

        self.interface = MujocoInterface()
        self.interface.init(self.sim, self.mocap.dt)

        total_length = 1  # phase variable
        total_length += self.mocap.num_bodies * (self.mocap.pos_dim +
                                                 self.mocap.rot_dim) + 1
        total_length += self.mocap.num_bodies * (self.mocap.pos_dim +
                                                 self.mocap.rot_dim - 1)

        self.state_size = total_length
        self.action_size = self.interface.action_size

    def update(self, timestep):
        self.sim.step()

    def reset(self):
        self.sim.reset()

    def get_time(self):
        return self.sim.data.time

    def get_name(self):
        return 'test'

    # rendering and UI interface
    def draw(self):
        self.viewer.render()

    def keyboard(self, key, x, y):
        pass

    def mouse_click(self, button, state, x, y):
        pass

    def mouse_move(self, x, y):
        pass

    def reshape(self, w, h):
        pass

    def shutdown(self):
        pass

    def is_done(self):
        return False

    def set_playback_speed(self, speed):
        pass

    def set_updates_per_sec(self, updates_per_sec):
        pass

    def get_win_width(self):
        return 640

    def get_win_height(self):
        return 320

    def get_num_update_substeps(self):
        return 32

    # rl interface
    def is_rl_scene(self):
        return True

    def get_num_agents(self):
        return 1

    def need_new_action(self, agent_id):
        return True

    def record_state(self, agent_id):
        self.data = self.sim.data
        # Cartesian position of body frame
        xpos = self.data.body_xpos
        xquat = self.data.body_xquat
        cvel = self.data.cvel

        valid_xpos = self.interface.align_state(xpos)
        valid_xquat = self.interface.align_state(xquat)
        valid_cvel = self.interface.align_state(cvel)

        root_xpos = valid_xpos[0]

        state = np.zeros(self.state_size)
        state.fill(np.nan)  # fill with nan to avoid any missing data

        curr_idx = 0
        state[curr_idx] = 0
        curr_idx += 1

        state[curr_idx] = root_xpos[1]
        curr_idx += 1

        for i in range(self.mocap.num_bodies):
            state[curr_idx:curr_idx + 3] = valid_xpos[i] - root_xpos
            curr_idx += 3
            state[curr_idx:curr_idx + 4] = valid_xquat[i]
            curr_idx += 4

        for i in range(self.mocap.num_bodies):
            state[curr_idx:curr_idx + 6] = valid_cvel[i]
            curr_idx += 6

        return state

    def record_goal(self, agent_id):
        return np.array([1])

    def get_action_space(self, agent_id):
        return 1

    def set_action(self, agent_id, action):
        torque = self.interface.action2torque(action)
        self.sim.data.ctrl[:] = torque[:]
        return

    def get_state_size(self, agent_id):
        return self.state_size

    def get_goal_size(self, agent_id):
        return 0

    def get_action_size(self, agent_id):
        return self.action_size

    def get_num_actions(self, agent_id):
        return

    def build_state_offset(self, agent_id):
        return np.zeros(self.get_state_size(agent_id))

    def build_state_scale(self, agent_id):
        return np.ones(self.get_state_size(agent_id))

    def build_goal_offset(self, agent_id):
        # return np.zeros(1)
        return np.array([])

    def build_goal_scale(self, agent_id):
        # return np.ones(1)
        return np.array([])

    def build_action_offset(self, agent_id):
        return np.zeros(self.get_action_size(agent_id))

    def build_action_scale(self, agent_id):
        return np.ones(self.get_action_size(agent_id))

    def build_action_bound_min(self, agent_id):
        return -10 * np.ones(self.get_action_size(agent_id))

    def build_action_bound_max(self, agent_id):
        return 10 * np.ones(self.get_action_size(agent_id))

    def build_state_norm_groups(self, agent_id):
        tmp = np.zeros(self.get_state_size(agent_id))
        tmp[-1] = 1
        return tmp

    def build_goal_norm_groups(self, agent_id):
        # return np.ones(1)
        return np.array([])

    def calc_reward(self, agent_id):
        # TODO
        return np.random.rand() - 0.5

    def is_episode_end(self):
        return False

    def check_terminate(self, agent_id):
        return 2

    def check_valid_episode(self):
        return True

    def log_val(self, agent_id, val):
        pass

    def set_sample_count(self, count):
        pass

    def set_mode(self, mode):
        pass
Example #35
0
class JuggleEnv:
    def __init__(self):
        self.control_freq: float = 50.0
        self.control_timestep: float = 1.0 / self.control_freq
        self.viewer = None
        self.horizon = 1000
        self.target = np.array([0.8, 0.0, 1.9])

        # load model
        self.robot: Robot = None
        self.arena: Arena = None
        self.pingpong: MujocoGeneratedObject = None
        self.model: MujocoWorldBase = None
        self._load_model()

        # initialize simulation
        self.mjpy_model = None
        self.sim: MjSim = None
        self.model_timestep: float = 0.0
        self._initialize_sim()

        # reset robot, object and internel variables
        self.cur_time: float = 0.0
        self.timestep: int = 0.0
        self.done: bool = False
        self._pingpong_body_id: int = -1
        self._paddle_body_id: int = -1
        self._reset_internel()

        # internel variable for scoring
        self._below_plane = False
        self.plane_height = 1.5

    def _load_model(self):
        # Load the desired controller's default config as a dict
        controller_config = load_controller_config(
            default_controller="JOINT_VELOCITY")
        controller_config["output_max"] = 1.0
        controller_config["output_min"] = -1.0
        robot_noise = {"magnitude": [0.05] * 7, "type": "gaussian"}
        self.robot = SingleArm(
            robot_type="IIWA",
            idn=0,
            controller_config=controller_config,
            initial_qpos=[0.0, 0.7, 0.0, -1.4, 0.0, -0.56, 0.0],
            initialization_noise=robot_noise,
            gripper_type="PaddleGripper",
            gripper_visualization=True,
            control_freq=self.control_freq)
        self.robot.load_model()
        self.robot.robot_model.set_base_xpos([0, 0, 0])

        self.arena = EmptyArena()
        self.arena.set_origin([0.8, 0, 0])

        self.pingpong = BallObject(name="pingpong",
                                   size=[0.02],
                                   rgba=[0.8, 0.8, 0, 1],
                                   solref=[0.1, 0.03],
                                   solimp=[0, 0, 1],
                                   density=100)
        pingpong_model = self.pingpong.get_collision()
        pingpong_model.append(
            new_joint(name="pingpong_free_joint", type="free"))
        pingpong_model.set("pos", "0.8 0 2.0")

        # merge into one
        self.model = MujocoWorldBase()
        self.model.merge(self.robot.robot_model)
        self.model.merge(self.arena)
        self.model.worldbody.append(pingpong_model)

    def _initialize_sim(self):
        # if we have an xml string, use that to create the sim. Otherwise, use the local model
        self.mjpy_model = self.model.get_model(mode="mujoco_py")

        # Create the simulation instance and run a single step to make sure changes have propagated through sim state
        self.sim = MjSim(self.mjpy_model)
        self.sim.step()
        self.robot.reset_sim(self.sim)
        self.model_timestep = self.sim.model.opt.timestep

    def _reset_internel(self):
        # reset robot
        self.robot.setup_references()
        self.robot.reset(deterministic=False)

        # reset pingpong
        pingpong_pos = self.target + np.random.rand(3) * 0.08 - 0.04
        pingpong_quat = np.array([1.0, 0.0, 0.0, 0.0])
        self.sim.data.set_joint_qpos(
            "pingpong_free_joint",
            np.concatenate([pingpong_pos, pingpong_quat]))

        # get handle for important parts
        self._pingpong_body_id = self.sim.model.body_name2id("pingpong")
        self._paddle_body_id = self.sim.model.body_name2id(
            "gripper0_paddle_body")

        # Setup sim time based on control frequency
        self.cur_time = 0
        self.timestep = 0
        self.done = False

    def reset(self):
        self.sim.reset()
        self._reset_internel()
        self.sim.forward()
        return self._get_observation()

    def _get_observation(self):
        di = OrderedDict()

        # get robot observation
        di = self.robot.get_observations(di)

        # get pingpong observation
        pingpong_pos = np.array(
            self.sim.data.body_xpos[self._pingpong_body_id])
        di["pingpong_pos"] = pingpong_pos
        return di

    def step(self, action: np.ndarray):
        if self.done:
            raise ValueError("executing action in terminated episode")

        policy_step = True
        score = 0.0
        for _ in range(int(self.control_timestep / self.model_timestep)):
            self.sim.forward()
            self.robot.control(action=action, policy_step=policy_step)
            # self.sim.data.ctrl[:] = action*5.0
            self.sim.step()
            policy_step = False
            # check if the ball pass the plane
            h = self.sim.data.body_xpos[self._pingpong_body_id][2]
            self._below_plane |= h < self.plane_height
            if self._below_plane and h > self.plane_height:
                score = 1.0
                self._below_plane = False

        self.timestep += 1
        self.cur_time += self.control_timestep
        observation = self._get_observation()
        dist_xy = np.linalg.norm(
            (observation["robot0_eef_pos"] - observation["pingpong_pos"])[:2])
        # paddle_height = observation["robot0_eef_pos"][2]
        self.done = self.timestep >= self.horizon or dist_xy > 0.2
        reward = score  # + 0 * (0.2 - dist_xy)
        return observation, reward, self.done, {}

    def render(self, mode="human"):
        if mode == "human":
            self._get_viewer().render()
        elif mode == "rgb_array":
            img = self.sim.render(1920, 1080)
            return img[::-1, :, ::-1]

    def _get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            self.viewer.vopt.geomgroup[0] = 0
            self.viewer._hide_overlay = True
        return self.viewer

    def close(self):
        self._destroy_viewer()

    def _destroy_viewer(self):
        if self.viewer is not None:
            glfw.destroy_window(self.viewer.window)
            self.viewer = None

    def seed(self):
        pass
Example #36
0
class BaseEnv:
    def __init__(
        self,
        robot_folders,
        robot_dir,
        substeps,
        tol=0.02,
        train=True,
        with_kin=None,
        with_dyn=None,
        multi_goal=False,
    ):
        self.with_kin = with_kin
        self.with_dyn = with_dyn
        self.multi_goal = multi_goal
        self.goal_dim = 3

        if self.with_dyn:
            norm_file = os.path.join(robot_dir, 'stats/dyn_stats.json')
            with open(norm_file, 'r') as f:
                stats = json.load(f)
            self.dyn_mu = np.array(stats['mu']).reshape(-1)
            self.dyn_sigma = np.array(stats['sigma']).reshape(-1)
            self.dyn_min = np.array(stats['min']).reshape(-1)
            self.dyn_max = np.array(stats['max']).reshape(-1)

        self.nsubsteps = substeps
        self.metadata = {}
        self.reward_range = (-np.inf, np.inf)
        self.spec = None
        self.dist_tol = tol
        self.viewer = None

        self.links = [
            'gl0', 'gl1_1', 'gl1_2', 'gl2', 'gl3_1', 'gl3_2', 'gl4', 'gl5_1',
            'gl5_2', 'gl6'
        ]
        self.bodies = [
            "connector_plate_base", "electric_gripper_base",
            "gripper_l_finger", "gripper_l_finger_tip", "gripper_r_finger",
            "gripper_r_finger_tip", "l0", "l1", "l2", "l3", "l4", "l5", "l6"
        ]
        self.site_set = ['j0', 'j1', 'j2', 'j3', 'j4', 'j5', 'j6']
        self.joint_set = self.site_set

        self.robots = []
        for folder in robot_folders:
            self.robots.append(os.path.join(robot_dir, folder))

        self.dir2id = {folder: idx for idx, folder in enumerate(self.robots)}
        self.robot_num = len(self.robots)

        if train:
            self.test_robot_num = min(100, self.robot_num)
            self.train_robot_num = self.robot_num - self.test_robot_num
            self.test_robot_ids = list(
                range(self.train_robot_num, self.robot_num))
            self.train_test_robot_num = min(100, self.train_robot_num)
            self.train_test_robot_ids = list(range(self.train_test_robot_num))
            self.train_test_conditions = self.train_test_robot_num
        else:
            self.test_robot_num = self.robot_num
            self.train_robot_num = self.robot_num - self.test_robot_num
            self.test_robot_ids = list(range(self.robot_num))

        self.test_conditions = self.test_robot_num

        print('Train robots: ', self.train_robot_num)
        print('Test robots: ', self.test_robot_num)
        print('Multi goal:', self.multi_goal)
        self.reset_robot(0)

        self.ob_dim = self.get_obs()[0].size
        print('Ob dim:', self.ob_dim)

        high = np.inf * np.ones(self.ob_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        self.ep_reward = 0
        self.ep_len = 0

    def reset(self, robot_id=None):
        raise NotImplementedError

    def step(self, action):
        raise NotImplementedError

    def update_action_space(self):
        actuators = self.sim.model._actuator_name2id.keys()
        valid_joints = [ac for ac in actuators if ac in self.joint_set]
        self.act_dim = len(valid_joints)
        bounds = self.sim.model.actuator_ctrlrange[:self.act_dim]
        self.ctrl_low = np.copy(bounds[:, 0])
        self.ctrl_high = np.copy(bounds[:, 1])
        self.action_space = spaces.Box(self.ctrl_low,
                                       self.ctrl_high,
                                       dtype=np.float32)

    def scale_action(self, action):
        act_k = (self.action_space.high - self.action_space.low) / 2.
        act_b = (self.action_space.high + self.action_space.low) / 2.
        return act_k * action + act_b

    def reset_robot(self, robot_id):
        self.robot_folder_id = self.dir2id[self.robots[robot_id]]
        robot_file = os.path.join(self.robots[robot_id], 'robot.xml')
        self.model = load_model_from_path(robot_file)
        self.sim = MjSim(self.model, nsubsteps=self.nsubsteps)
        self.update_action_space()
        self.sim.reset()
        self.sim.step()
        if self.viewer is not None:
            self.viewer = MjViewer(self.sim)

    def test_reset(self, cond):
        robot_id = self.test_robot_ids[cond]
        return self.reset(robot_id=robot_id)

    def train_test_reset(self, cond):
        robot_id = self.train_test_robot_ids[cond]
        return self.reset(robot_id=robot_id)

    def cal_reward(self, s, goal, a):
        dist = np.linalg.norm(s - goal)

        if dist < self.dist_tol:
            done = True
            reward_dist = 1
        else:
            done = False
            reward_dist = -1
        reward = reward_dist
        reward -= 0.1 * np.square(a).sum()
        return reward, dist, done

    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        self.viewer.render()

    def get_obs(self):
        qpos = self.get_qpos(self.sim)
        qvel = self.get_qvel(self.sim)

        ob = np.concatenate([qpos, qvel])
        if self.with_kin:
            link_rela = self.get_xpos_xrot(self.sim)
            ob = np.concatenate([ob, link_rela])
        if self.with_dyn:
            body_mass = self.get_body_mass(self.sim)
            joint_friction = self.get_friction(self.sim)
            joint_damping = self.get_damping(self.sim)
            armature = self.get_armature(self.sim)
            dyn_vec = np.concatenate(
                (body_mass, joint_friction, joint_damping, armature))
            dyn_vec = np.divide((dyn_vec - self.dyn_min),
                                self.dyn_max - self.dyn_min)
            ob = np.concatenate([ob, dyn_vec])
        target_pos = self.sim.data.get_site_xpos('target').flatten()
        ob = np.concatenate([ob, target_pos])
        achieved_goal = self.sim.data.get_site_xpos('ref_pt')
        return ob, achieved_goal

    def get_link_length(self, sim):
        link_length = []
        for link in self.links:
            geom_id = sim.model._geom_name2id[link]
            link_length.append(sim.model.geom_size[geom_id][1])
        return np.asarray(link_length).reshape(-1)

    def get_qpos(self, sim):
        angle_noise_range = 0.02
        qpos = sim.data.qpos[:self.act_dim]
        qpos += np.random.uniform(-angle_noise_range, angle_noise_range,
                                  self.act_dim)
        qpos = np.pad(qpos, (0, 7 - self.act_dim),
                      mode='constant',
                      constant_values=0)
        return qpos.reshape(-1)

    def get_qvel(self, sim):
        velocity_noise_range = 0.02
        qvel = sim.data.qvel[:self.act_dim]
        qvel += np.random.uniform(-velocity_noise_range, velocity_noise_range,
                                  self.act_dim)
        qvel = np.pad(qvel, (0, 7 - self.act_dim),
                      mode='constant',
                      constant_values=0)
        return qvel.reshape(-1)

    def get_xpos_xrot(self, sim):
        xpos = []
        xrot = []
        for joint_id in range(self.act_dim):
            joint = sim.model._actuator_id2name[joint_id]
            if joint == 'j0':
                pos1 = sim.data.get_body_xpos('base_link')
                mat1 = sim.data.get_body_xmat('base_link')
            else:
                prev_id = joint_id - 1
                prev_joint = sim.model._actuator_id2name[prev_id]
                pos1 = sim.data.get_site_xpos(prev_joint)
                mat1 = sim.data.get_site_xmat(prev_joint)
            pos2 = sim.data.get_site_xpos(joint)
            mat2 = sim.data.get_site_xmat(joint)
            relative_pos = pos2 - pos1
            rot_euler = self.relative_rotation(mat1, mat2)
            xpos.append(relative_pos)
            xrot.append(rot_euler)
        xpos = np.array(xpos).flatten()
        xrot = np.array(xrot).flatten()
        xpos = np.pad(xpos, (0, (7 - self.act_dim) * 3),
                      mode='constant',
                      constant_values=0)
        xrot = np.pad(xrot, (0, (7 - self.act_dim) * 3),
                      mode='constant',
                      constant_values=0)
        ref_pt_xpos = self.sim.data.get_site_xpos('ref_pt')
        ref_pt_xmat = self.sim.data.get_site_xmat('ref_pt')
        relative_pos = ref_pt_xpos - pos2
        rot_euler = self.relative_rotation(mat2, ref_pt_xmat)
        xpos = np.concatenate((xpos, relative_pos.flatten()))
        xrot = np.concatenate((xrot, rot_euler.flatten()))
        pos_rot = np.concatenate((xpos, xrot))
        return pos_rot

    def get_damping(self, sim):
        damping = sim.model.dof_damping[:self.act_dim]
        damping = np.pad(damping, (0, 7 - self.act_dim),
                         mode='constant',
                         constant_values=0)
        return damping.reshape(-1)

    def get_friction(self, sim):
        friction = sim.model.dof_frictionloss[:self.act_dim]
        friction = np.pad(friction, (0, 7 - self.act_dim),
                          mode='constant',
                          constant_values=0)
        return friction.reshape(-1)

    def get_body_mass(self, sim):
        body_mass = []
        for body in self.bodies:
            body_id = sim.model._body_name2id[body]
            body_mass.append(sim.model.body_mass[body_id])
        return np.asarray(body_mass).reshape(-1)

    def get_armature(self, sim):
        armature = sim.model.dof_armature[:self.act_dim]
        armature = np.pad(armature, (0, 7 - self.act_dim),
                          mode='constant',
                          constant_values=0)
        return armature.reshape(-1)

    def relative_rotation(self, mat1, mat2):
        # return the euler x,y,z of the relative rotation
        # (w.r.t site1 coordinate system) from site2 to site1
        rela_mat = np.dot(np.linalg.inv(mat1), mat2)
        return rotations.mat2euler(rela_mat)

    def close(self):
        pass