Example #1
0
def test_viewer():
    model = load_model_from_path("mujoco_py/tests/test.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    for _ in range(100):
        sim.step()
        viewer.render()
Example #2
0
def test_multiple_sims():
    # Ensure that creating new simulators still produces good renderings.
    xml = """
    <mujoco>
        <asset>
            <texture name="t1" width="32" height="32" type="2d" builtin="flat" />
            <material name="m1" texture="t1" />
        </asset>
        <worldbody>
            <light diffuse=".5 .5 .5" pos="0 0 5" dir="0 0 -1" />
            <camera name="topcam" pos="0 0 2.5" zaxis="0 0 1" />
            <geom name="g1" pos="0 0 0" type="box" size="1 1 0.1" rgba="1 1 1 1" material="m1" />
        </worldbody>
    </mujoco>
    """

    model = load_model_from_xml(xml)
    random_state = np.random.RandomState(0)

    for i in range(3):
        sim = MjSim(model)
        sim.forward()
        modder = TextureModder(sim, random_state=random_state)
        for j in range(2):
            modder.rand_checker('g1')
            compare_imgs(
                sim.render(201, 205, camera_name="topcam"),
                'test_multiple_sims.loop%d_%d.png' % (i, j))
Example #3
0
def test_data_attribute_getters():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    assert_array_equal(sim.data.get_body_xpos("body1"), [0, 0, 1])
    with pytest.raises(ValueError):
        sim.data.get_body_xpos("body_foo")
    with pytest.raises(RuntimeError):
        sim.data.get_xpos("body1")
    assert len(sim.data.get_body_xquat("body1")) == 4
    assert_array_equal(sim.data.get_body_xmat("body1").shape, (3, 3))
    # At (0, 1, 1) since the geom is displaced in the body
    assert_array_equal(sim.data.get_body_xipos("body1"), [0, 1, 1])

    assert_array_equal(sim.data.get_site_xpos("site1"), [1, 0, 1])
    assert_array_equal(sim.data.get_site_xmat("site1").shape, (3, 3))
    assert_array_equal(sim.data.get_geom_xpos("geom1"), [0.5, 0.4, 0.3])
    assert_array_equal(sim.data.get_geom_xpos("geom2"), [0, 1, 1])
    assert_array_equal(sim.data.get_geom_xmat("geom2").shape, (3, 3))
    assert_array_equal(sim.data.get_light_xpos("light1"), [0, 0, 3])
    assert_array_equal(sim.data.get_light_xdir("light1"), [0, 0, -1])
    assert_array_equal(sim.data.get_camera_xpos("camera1"), [3, 0, 0])
    assert_array_equal(sim.data.get_camera_xmat("camera1").shape, (3, 3))

    assert_array_equal(sim.data.get_joint_xaxis("joint1"), [0, 0, 1])
    assert_array_equal(sim.data.get_joint_xanchor("joint1"), [0, 0, 1])
Example #4
0
def test_concurrent_rendering():
    '''Best-effort testing that concurrent multi-threaded rendering works.
    The test has no guarantees around being deterministic, but if it fails
    you know something is wrong with concurrent rendering. If it passes,
    things are probably working.'''
    err = None
    def func(sim, event):
        event.wait()
        sim.data.qpos[:] = 0.0
        sim.forward()
        img1 = sim.render(width=40, height=40, camera_name="camera1")
        img2 = sim.render(width=40, height=40, camera_name="camera2")
        try:
            assert np.sum(img1[:]) == 23255
            assert np.sum(img2[:]) == 12007
        except Exception as e:
            nonlocal err
            err = e

    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.render(100, 100)
    event = Event()
    threads = []
    for _ in range(100):
        thread = Thread(target=func, args=(sim, event))
        threads.append(thread)
        thread.start()
    event.set()
    for thread in threads:
        thread.join()
    assert err is None, "Exception: %s" % (str(err))
Example #5
0
def test_high_res():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    img = sim.render(1000, 1000)
    img = np.array(Image.fromarray(img).resize(size=(200, 200)))
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')
Example #6
0
def test_high_res():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    img = sim.render(1000, 1000)
    img = scipy.misc.imresize(img, (200, 200, 3))
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')
Example #7
0
def test_ignore_mujoco_warnings():
    # Two boxes on a plane need more than 1 contact (nconmax)
    xml = '''
    <mujoco>
      <size nconmax="1"/>
      <worldbody>
        <geom type="plane" size="1 1 0.1"/>
        <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body>
        <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body>
      </worldbody>
    </mujoco>
    '''
    model = load_model_from_xml(xml)
    sim = MjSim(model)

    sim.reset()
    with ignore_mujoco_warnings():
        # This should raise an exception due to the mujoco warning callback,
        # but it's suppressed by the context manager.
        sim.step()

    sim.reset()
    with pytest.raises(Exception):
        # test to make sure previous warning callback restored.
        sim.step()
Example #8
0
def test_viewercontext():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    renderer = cymj.MjRenderContext(sim, offscreen=True)
    renderer.add_marker(type=const.GEOM_SPHERE,
                        size=np.ones(3) * 0.1,
                        pos=np.zeros(3),
                        mat=np.eye(3).flatten(),
                        rgba=np.ones(4),
                        label="mark")
Example #9
0
def test_mj_sim_basics():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model, nsubsteps=2)

    sim.reset()
    sim.step()
    sim.reset()
    sim.forward()
Example #10
0
def test_read_depth_buffer():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    ctx = MjRenderContext(sim, offscreen=True, opengl_backend='glfw')

    buf = np.zeros((11, 100), dtype=np.float32)
    assert buf.sum() == 0, f'{buf.sum()}'

    ctx.render(buf.shape[1], buf.shape[0], 0)
    ctx.read_pixels_depth(buf)
    assert buf.sum() != 0, f'{buf.sum()} {buf.max()}'
Example #11
0
def test_glfw_context():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    render_context = MjRenderContext(sim, offscreen=True, opengl_backend='glfw')
    assert len(sim.render_contexts) == 1
    assert sim.render_contexts[0] is render_context
    assert isinstance(render_context.opengl_context, GlfwContext)

    compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_glfw_context.png')
    assert len(sim.render_contexts) == 1
    assert sim.render_contexts[0] is render_context
Example #12
0
def test_joint_qpos_qvel_ops():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    # Test setting one with a list
    sim.data.set_joint_qpos("joint1", [1, 2, 3, 1, 0, 0, 0])
    # And the other with an np.ndarray
    sim.data.set_joint_qvel("joint1", np.array([1, 2, 3, 0.1, 0.1, 0.1]))
    sim.forward()
    assert_array_equal(sim.data.get_joint_qpos(
        "joint1"), [1, 2, 3, 1, 0, 0, 0])
    assert_array_equal(sim.data.get_joint_qvel(
        "joint1"), [1, 2, 3, 0.1, 0.1, 0.1])
Example #13
0
    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()
Example #14
0
def test_textures():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.premod.png')

    random_state = np.random.RandomState(0)
    modder = TextureModder(sim, random_state=random_state)
    modder.whiten_materials()
    modder.whiten_materials(['g1', 'g2'])

    modder.set_rgb('g1', (255, 0, 0))
    modder.set_rgb('g2', (0, 255, 0))
    modder.set_rgb('g3', (0, 0, 255))
    modder.set_rgb('g4', (255, 0, 255))
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rgb.png')

    modder.set_checker('g1', (255, 0, 0), (0, 255, 0))
    modder.set_gradient('g2', (0, 255, 0), (0, 0, 255), vertical=True)
    modder.set_gradient('g3', (255, 255, 0), (0, 0, 255), vertical=False)
    modder.set_noise('g4', (0, 0, 255), (255, 0, 0), 0.1)
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.variety.png')

    modder.rand_checker('g1')
    modder.rand_gradient('g2')
    modder.rand_noise('g3')
    modder.rand_rgb('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rand_specific.png')

    modder.rand_all('g1')
    modder.rand_all('g2')
    modder.rand_all('g3')
    modder.rand_all('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rand_all.png')

    modder.rand_checker('g1')
    modder.rand_checker('g2')
    modder.rand_checker('g3')
    modder.rand_checker('g4')
    mat_modder = MaterialModder(sim, random_state=random_state)
    mat_modder.rand_texrepeat('g1')
    mat_modder.rand_texrepeat('g2')
    mat_modder.rand_texrepeat('g3')
    mat_modder.rand_texrepeat('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rand_texrepeat.png')
Example #15
0
def test_rendering():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    img, depth = sim.render(200, 200, depth=True)
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')

    depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth))
    depth = np.asarray(depth * 255, dtype=np.uint8)
    assert depth.shape == (200, 200)
    compare_imgs(depth, 'test_rendering.freecam.depth.png')

    img = sim.render(100, 100, camera_name="camera1")
    assert img.shape == (100, 100, 3)
    compare_imgs(img, 'test_rendering.camera1.png')

    img = sim.render(200, 100, camera_name="camera1")
    assert img.shape == (100, 200, 3)
    compare_imgs(img, 'test_rendering.camera1.narrow.png')

    render_context = sim.render_contexts[0]
    render_context.add_marker(size=np.array([.4, .5, .6]),
                              pos=np.array([.4, .5, .6]),
                              rgba=np.array([.7, .8, .9, 1.0]),
                              label="mark")
    img = sim.render(200, 200, camera_name="camera1")
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering_markers.camera1.png')
Example #16
0
def test_rendering():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    img, depth = sim.render(200, 200, depth=True)
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')

    depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth))
    depth = np.asarray(depth * 255, dtype=np.uint8)
    assert depth.shape == (200, 200)

    # Unfortunately mujoco 2.0 renders slightly different depth image on mac and on linux here
    if "darwin" in sys.platform.lower():
        compare_imgs(depth, 'test_rendering.freecam.depth-darwin.png')
    else:
        compare_imgs(depth, 'test_rendering.freecam.depth.png')

    img = sim.render(100, 100, camera_name="camera1")
    assert img.shape == (100, 100, 3)
    compare_imgs(img, 'test_rendering.camera1.png')

    img = sim.render(200, 100, camera_name="camera1")
    assert img.shape == (100, 200, 3)
    compare_imgs(img, 'test_rendering.camera1.narrow.png')

    render_context = sim.render_contexts[0]
    render_context.add_marker(size=np.array([.4, .5, .6]),
                              pos=np.array([.4, .5, .6]),
                              rgba=np.array([.7, .8, .9, 1.0]),
                              label="mark")
    img = sim.render(200, 200, camera_name="camera1")
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering_markers.camera1.png')
Example #17
0
    def test_ray(self):
        ''' Test raycasting and exclusions '''
        sim = MjSim(load_model_from_xml(self.xml))
        sim.forward()

        # Include all geoms
        self.check_rays(sim,
                        [0.9, 0.1, 0.9, 0.1, 0.9, 0.1, -1.0],
                        ['A', 'A', 'B', 'B', 'C', 'C', None])

        # Include static geoms, but exclude worldbody (which contains 'A')
        self.check_rays(sim,
                        [2.9, 1.9, 0.9, 0.1, 0.9, 0.1, -1.0],
                        ['B', 'B', 'B', 'B', 'C', 'C', None],
                        exclude_body=0)

        # Include static geoms, and exclude body 1 (which contains 'C')
        self.check_rays(sim,
                        [0.9, 0.1, 0.9, 0.1, -1.0, -1.0, -1.0],
                        ['A', 'A', 'B', 'B', None, None, None],
                        exclude_body=1)

        # Include static geoms, and exclude body 2 (which contains 'B')
        self.check_rays(sim,
                        [0.9, 0.1, 2.9, 1.9, 0.9, 0.1, -1.0],
                        ['A', 'A', 'C', 'C', 'C', 'C', None],
                        exclude_body=2)

        # Exclude static geoms ('A' is the only static geom)
        self.check_rays(sim,
                        [2.9, 1.9, 0.9, 0.1, 0.9, 0.1, -1.0],
                        ['B', 'B', 'B', 'B', 'C', 'C', None],
                        include_static_geoms=False)

        # Exclude static geoms, and exclude body 1 ('C')
        self.check_rays(sim,
                        [2.9, 1.9, 0.9, 0.1, -1.0, -1.0, -1.0],
                        ['B', 'B', 'B', 'B', None, None, None],
                        include_static_geoms=False, exclude_body=1)

        # Exclude static geoms, and exclude body 2 (which contains 'B')
        self.check_rays(sim,
                        [4.9, 3.9, 2.9, 1.9, 0.9, 0.1, -1.0],
                        ['C', 'C', 'C', 'C', 'C', 'C', None],
                        include_static_geoms=False, exclude_body=2)
Example #18
0
def test_mocap_ops():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    assert_array_equal(sim.data.get_body_xpos("mocap1"), [1, 0, 0])
    assert_array_equal(sim.data.get_mocap_pos("mocap1"), [1, 0, 0])
    assert_array_equal(sim.data.get_mocap_quat("mocap1"), [1, 0, 0, 0])
    new_pos = [2, 1, 1]
    new_quat = [0.707107, 0.707107, 0, 0]
    sim.data.set_mocap_pos("mocap1", new_pos)
    sim.data.set_mocap_quat("mocap1", new_quat)
    sim.forward()
    assert_array_equal(sim.data.get_mocap_pos("mocap1"), new_pos)
    assert_array_almost_equal(sim.data.get_mocap_quat("mocap1"), new_quat)
    assert_array_equal(sim.data.get_body_xpos("mocap1"), new_pos)
    assert_array_almost_equal(sim.data.get_body_xquat("mocap1"), new_quat)
    assert_array_almost_equal(sim.data.get_body_xmat("mocap1"),
                              [[1, 0, 0], [0, 0, -1], [0, 1, 0]])
Example #19
0
def test_sim_save():
    model = load_model_from_xml(BASIC_MODEL_XML)
    assert model.nkey == 0
    sim = MjSim(model)

    with StringIO() as f:
        sim.save(f)

        f.seek(0)
        loaded_model = load_model_from_xml(f.read())

        assert loaded_model.nkey == 1

    with BytesIO() as f:
        sim.save(f, format='mjb')

        f.seek(0)
        loaded_model = load_model_from_mjb(f.read())
        assert loaded_model.nkey == 1
Example #20
0
def test_mj_warning_raises():
    ''' Test that MuJoCo warnings cause exceptions. '''
    # Two boxes on a plane need more than 1 contact (nconmax)
    xml = '''
    <mujoco>
      <size nconmax="1"/>
      <worldbody>
        <geom type="plane" size="1 1 0.1"/>
        <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body>
        <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body>
      </worldbody>
    </mujoco>
    '''
    model = load_model_from_xml(xml)
    sim = MjSim(model)

    sim.reset()
    with pytest.raises(Exception):
        # This should raise an exception due to the mujoco warning callback
        sim.step()
Example #21
0
def test_xvelr():  # xvelr = rotational velocity in world frame
    xml = """
    <mujoco>
        <worldbody>
            <body name="body1" pos="0 0 0">
                <joint name="a" axis="1 0 0" pos="0 0 0" type="hinge"/>
                <geom name="geom1" pos="0 0 0" size="0.3"/>
                <body name="body2" pos="0 0 1">
                    <joint name="b" axis="1 0 0" pos="0 0 0" type="hinge"/>
                    <geom name="geom2" pos="0 0 0" size="0.3"/>
                    <site name="site1" size="0.1"/>
                </body>
            </body>
        </worldbody>
        <actuator>
            <motor joint="a"/>
            <motor joint="b"/>
        </actuator>
    </mujoco>
    """
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    sim.reset()
    sim.forward()
    # Check that xvelr starts out at zero (since qvel is zero)
    site1_xvelr = sim.data.get_site_xvelr('site1')
    np.testing.assert_allclose(site1_xvelr, np.zeros(3))
    # Push the base body and step forward to get it moving
    sim.data.ctrl[0] = 1e9
    sim.step()
    sim.forward()
    # Check that the first body has nonzero xvelr
    body1_xvelr = sim.data.get_body_xvelr('body1')
    assert not np.allclose(body1_xvelr, np.zeros(3))
    # Check that the second body has zero xvelr (still)
    body2_xvelr = sim.data.get_body_xvelr('body2')
    np.testing.assert_allclose(body2_xvelr, np.zeros(3))
    # Check that this matches the batch (gathered) getter property
    np.testing.assert_allclose(body2_xvelr, sim.data.body_xvelr[2])
Example #22
0
def test_rendering_failing():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    sim.render(100, 100)
    render_context = sim.render_contexts[0]
    render_context.add_marker(size=np.array([.4, .5, .6]),
                              pos=np.array([.4, .5, .6]),
                              rgba=np.array([.7, .8, .9, 1.0]),
                              label="blaaaa")
    img = sim.render(200, 200, camera_name="camera1")
    assert img.shape == (200, 200, 3)
    try:
        compare_imgs(img, 'test_rendering_markers.camera1.png')
        assert False
    except Exception as e:
        pass
Example #23
0
def test_materials():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_materials.premod.png')

    random_state = np.random.RandomState(0)
    modder = MaterialModder(sim, random_state=random_state)

    modder.set_specularity('g1', 1.0)
    modder.set_reflectance('g2', 1.0)
    modder.set_shininess('g3', 1.0)
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_materials.props.png')

    modder.rand_all('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_materials.rand_all.png')
Example #24
0
class Aether:
    """
    Deus do espaço e do paraíso
    Faz a conexão entre o simulador e os módulos do programa
    """
    def __init__(self):
        # DEFINIÇÕES
        self.field_width = 640
        self.field_height = 480
        self.cascadeTime = 0
        self.cascadeLoops = 1
        self.cascadeLastTime = 0

        # PREPARAÇÃO
        model = load_model_from_path("simulator/scene.xml")
        self.sim = MjSim(model)
        self.viewer = Viewer(self.sim, self)
        self.ball_joint = self.sim.model.get_joint_qpos_addr("Ball")[0]
        self.robot_joints = [
            self.sim.model.get_joint_qpos_addr("Robot_01")[0],
            self.sim.model.get_joint_qpos_addr("Robot_02")[0],
            self.sim.model.get_joint_qpos_addr("Robot_03")[0],
            self.sim.model.get_joint_qpos_addr("Robot_04")[0],
            self.sim.model.get_joint_qpos_addr("Robot_05")[0],
            self.sim.model.get_joint_qpos_addr("Robot_06")[0]
        ]

        # EXECUÇÃO
        # prepara os módulos
        self.enabled = [False] * 6  # [False, False, False, True, True, True]
        self.athena = [Athena(), Athena()]
        self.zeus = [Zeus(), Zeus()]
        Endless.setup(self.field_width, self.field_height)
        self.athena[0].setup(3, 0.8)
        self.athena[1].setup(3, 0.8)
        self.zeus[0].setup(3)
        self.zeus[1].setup(3)

        # inicializa o loop dos dados
        self.pause = False
        self.loopThread1 = threading.Thread(target=self.loopTeam, args=[0])
        self.loopThread2 = threading.Thread(target=self.loopTeam, args=[1])
        self.loopThread1.daemon = True
        self.loopThread2.daemon = True
        self.loopThread1.start()
        self.loopThread2.start()

        self.showInfos(0)
        self.showInfos(1)

    def run(self):
        while True:
            self.sim.step()
            self.viewer.render()

    def loopTeam(self, team):
        while True:
            time.sleep(0.0000000001)

            if self.pause or \
                    (not self.enabled[3 * team] and not self.enabled[3 * team + 1] and not self.enabled[3 * team + 2]):
                continue
            # executa nossos módulos
            positions = self.generatePositions(team)
            commands = self.athena[team].getTargets(positions)
            velocities = self.zeus[team].getVelocities(commands)
            # aplica resultados na simulação
            if self.enabled[0 + 3 * team]:
                self.sim.data.ctrl[0 + 6 * team] = self.convertVelocity(
                    velocities[0]["vLeft"])
                self.sim.data.ctrl[1 + 6 * team] = self.convertVelocity(
                    velocities[0]["vRight"])
            if self.enabled[1 + 3 * team]:
                self.sim.data.ctrl[2 + 6 * team] = self.convertVelocity(
                    velocities[1]["vLeft"])
                self.sim.data.ctrl[3 + 6 * team] = self.convertVelocity(
                    velocities[1]["vRight"])
            if self.enabled[2 + 3 * team]:
                self.sim.data.ctrl[4 + 6 * team] = self.convertVelocity(
                    velocities[2]["vLeft"])
                self.sim.data.ctrl[5 + 6 * team] = self.convertVelocity(
                    velocities[2]["vRight"])

            # mostra resultados
            self.showInfos(team, positions, commands)
            # indicadores 3D
            for i in range(3):
                position = positions[0][i]["position"]
                self.setObjectPose("indicator_" + str(i + 3 * team + 1),
                                   position, team, 0.2,
                                   velocities[i]["vector"])
                # self.setObjectPose("virtual_robot_1", velocities["virtualPos"], team=0)

    # HELPERS
    def showInfos(self, team, positions=None, commands=None):
        infos = []

        for i in range(3):
            if self.enabled[i + 3 * team] and positions and commands:
                # informações que todos os robôs tem
                robot = "X: " + "{:.1f}".format(positions[0][i]["position"][0])
                robot += ", Y: " + "{:.1f}".format(
                    positions[0][i]["position"][1])
                robot += ", O: " + "{:.1f}".format(
                    positions[0][i]["orientation"])
                robot += ", T: " + commands[i]["tactics"]
                robot += ", C: " + commands[i]["command"]

                if commands[i]["command"] == "lookAt":
                    if type(commands[i]["data"]["target"]) is tuple:
                        robot += "(" + "{:.1f}".format(
                            commands[i]["data"]["target"][0]) + ", "
                        robot += "{:.1f}".format(
                            commands[i]["data"]["target"][1]) + ")"
                    else:
                        robot += "(" + "{:.1f}".format(
                            commands[i]["data"]["target"]) + ")"

                elif commands[i]["command"] == "goTo":
                    robot += "(" + "{:.1f}".format(
                        commands[i]["data"]["target"]["position"][0]) + ", "
                    robot += "{:.1f}".format(
                        commands[i]["data"]["target"]["position"][1]) + ", "

                    if type(commands[i]["data"]["target"]
                            ["orientation"]) is tuple:
                        robot += "(" + "{:.1f}".format(
                            commands[i]["data"]["target"]["orientation"]
                            [0]) + ", "
                        robot += "{:.1f}".format(commands[i]["data"]["target"]
                                                 ["orientation"][1]) + ") )"
                    else:
                        robot += "{:.1f}".format(
                            commands[i]["data"]["target"]["orientation"]) + ")"

                elif commands[i]["command"] == "spin":
                    robot += "(" + commands[i]["data"]["direction"] + ")"

            else:
                robot = "[OFF]"

            infos.append(robot)

        self.viewer.infos["robots" + str(team + 1)] = infos

        if team == 0:
            if positions:
                self.viewer.infos["ball"] = "X: " + "{:.2f}".format(positions[2]["position"][0]) + ", Y: " + \
                                            "{:.2f}".format(positions[2]["position"][1])
                fps = self.getFPS()
                if fps:
                    self.viewer.infos["fps"] = fps

            if commands:
                # indicadores 3D
                # print(commands[0]["futureBall"])
                self.setObjectPose("virtual_ball", commands[0]["futureBall"],
                                   0, 0.022)
                for i in range(3):
                    if commands[i]["command"] == "goTo":
                        target = commands[i]["data"]["target"]["position"]
                        targetOrientation = commands[i]["data"]["target"][
                            "orientation"]
                        if type(targetOrientation) is tuple:
                            position = positions[0][i]["position"]
                            targetOrientation = math.atan2(
                                position[1] - targetOrientation[1],
                                -(position[0] - targetOrientation[0]))

                        self.setObjectPose("target_" + str(i + 1), target, 0,
                                           0.01, targetOrientation)

    def getFPS(self):
        # calcula o fps e manda pra interface
        self.cascadeTime += time.time() - self.cascadeLastTime
        self.cascadeLoops += 1
        self.cascadeLastTime = time.time()
        if self.cascadeTime > 1:
            fps = self.cascadeLoops / self.cascadeTime
            self.cascadeTime = self.cascadeLoops = 0
            return "{:.2f}".format(fps)
        return None

    # FUNÇÕES
    def reset(self):
        for i in range(6):
            self.enabled[i] = False

        self.showInfos(0)
        self.showInfos(1)
        self.sim.reset()

    def startStop(self, pause):
        self.pause = pause

    def moveBall(self, direction, keepVel=False):
        if not keepVel:
            for i in range(6):
                self.sim.data.qvel[self.ball_joint + i] = 0

        if direction == 0:  # UP
            self.sim.data.qpos[self.ball_joint + 1] += 0.01
        elif direction == 1:  # DOWN
            self.sim.data.qpos[self.ball_joint + 1] -= 0.01
        elif direction == 2:  # LEFT
            self.sim.data.qpos[self.ball_joint] -= 0.01
        elif direction == 3:  # RIGHT
            self.sim.data.qpos[self.ball_joint] += 0.01

    def toggleRobot(self, robotId, moveOut=False):
        if self.sim.data.qpos[self.robot_joints[robotId] + 1] >= 1:
            self.enabled[robotId] = False
            if moveOut:
                self.sim.data.qpos[self.robot_joints[robotId] + 1] = 0
        elif moveOut:
            self.enabled[robotId] = False
            self.sim.data.qpos[
                self.robot_joints[robotId]] = -0.62 + 0.25 * robotId
            self.sim.data.qpos[self.robot_joints[robotId] + 1] = 1.5
            self.sim.data.qpos[self.robot_joints[robotId] + 2] = 0.04
            self.sim.data.ctrl[robotId] = self.sim.data.ctrl[robotId + 1] = 0
        elif self.enabled[robotId]:
            self.enabled[robotId] = False
            self.sim.data.ctrl[robotId] = self.sim.data.ctrl[robotId + 1] = 0
        else:
            self.enabled[robotId] = True

        self.showInfos(0 if robotId < 3 else 1)

    def convertPositionX(self, coord, team):
        """Traz o valor pra positivo e multiplica pela proporção (largura máxima)/(posição x máxima)

        Args:
             coord: Coordenada da posição no mundo da simulação a ser convertida
             team: Time que está pedindo a conversão (0 ou 1)
        Returns:
            Coordenada da posição na proporção utilizada pela estratégia
        """
        if team == 0:
            return (coord +
                    0.8083874182591296) * self.field_width / 1.6167748365182593
        else:
            return -(coord - 0.8083874182591296
                     ) * self.field_width / 1.6167748365182593

    def convertPositionY(self, coord, team):
        """Traz o valor pra positivo e multiplica pela proporção (altura máxima)/(posição y máxima)

        Args:
            coord: Coordenada da posição no mundo da simulação a ser convertida
            team: Time que está pedindo a conversão (0 ou 1)

        Returns:
            Coordenada da posição na proporção utilizada pela estratégia
        """
        if team == 0:
            return (coord + 0.58339083) * self.field_height / 1.16678166
        else:
            return -(coord - 0.58339083) * self.field_height / 1.16678166

    @staticmethod
    def convertVelocity(vel):
        return vel * 30

    def setObjectPose(self,
                      objectName,
                      newPos,
                      team=0,
                      height=0.04,
                      newOrientation=0):
        """Seta a posição e orientação de um objeto no simulador
        Args:
            objectName: Nome do objeto a ter a pose alterada. Esse nome deve ser de um mocap configurado na cena.
                        Se o objeto for virtual_robot_i, o robô é amarelo se i <= 3, azul caso contrário
            newPos: (x, y), 'x' e 'y' valores em pixels
            team: índice do time (valor em pixels inverte de acordo com o time)
            height: altura do objeto no universo
            newOrientation: orientação Z em radianos do objeto
        """
        if team == 0:
            x = (newPos[0] /
                 self.field_width) * 1.6167748365182593 - 0.8083874182591296
            y = (newPos[1] / self.field_height) * 1.16678166 - 0.58339083
        else:
            x = -(newPos[0] /
                  self.field_width) * 1.6167748365182593 + 0.8083874182591296
            y = -(newPos[1] / self.field_height) * 1.16678166 + 0.58339083

        # conversão de eulerAngles para quaternions (wikipedia)
        newQuat = [
            math.sin(newOrientation / 2), 0, 0,
            math.cos(newOrientation / 2)
        ]

        self.sim.data.set_mocap_quat(objectName, newQuat)
        self.sim.data.set_mocap_pos(objectName, np.array([x, y, height]))

    def generatePositions(self, team):
        """Cria o vetor de posições no formato esperado pela estratégia
        O 'sim.data.qpos' possui, em cada posição, o seguinte:
            0: pos X
            1: pos Y
            2: pos Z
            3: quat component w
            4: quat component x
            5: quat component y
            6: quat component z

        Returns:
            Vetor de posições no formato correto
        """
        r1 = math.pi * team - math.atan2(
            2 * (self.sim.data.qpos[self.robot_joints[3 * team] + 3] *
                 self.sim.data.qpos[self.robot_joints[3 * team] + 6] +
                 self.sim.data.qpos[self.robot_joints[3 * team] + 4] *
                 self.sim.data.qpos[self.robot_joints[3 * team] + 6]), 1 - 2 *
            (self.sim.data.qpos[self.robot_joints[3 * team] + 5] *
             self.sim.data.qpos[self.robot_joints[3 * team] + 5] +
             self.sim.data.qpos[self.robot_joints[3 * team] + 6] *
             self.sim.data.qpos[self.robot_joints[3 * team] + 6]))
        r2 = math.pi * team - math.atan2(
            2 *
            (self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 3] *
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6] +
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 4] *
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6]), 1 - 2 *
            (self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 5] *
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 5] +
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6] *
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6]))
        r3 = math.pi * team - math.atan2(
            2 *
            (self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 3] *
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6] +
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 4] *
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6]), 1 - 2 *
            (self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 5] *
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 5] +
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6] *
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6]))
        return [
            [  # robôs aliados
                {
                    "position":
                    (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[3 * team]], team),
                     self.convertPositionY(
                         self.sim.data.qpos[self.robot_joints[3 * team] + 1],
                         team)),
                    "orientation":
                    r1,
                    "robotLetter":
                    "A"
                }, {
                    "position":
                    (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[1 + 3 * team]],
                        team),
                     self.convertPositionY(
                         self.sim.data.qpos[self.robot_joints[1 + 3 * team] +
                                            1], team)),
                    "orientation":
                    r2,
                    "robotLetter":
                    "B"
                }, {
                    "position":
                    (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[2 + 3 * team]],
                        team),
                     self.convertPositionY(
                         self.sim.data.qpos[self.robot_joints[2 + 3 * team] +
                                            1], team)),
                    "orientation":
                    r3,
                    "robotLetter":
                    "C"
                }
            ],
            [  # robôs adversários
                {
                    "position":
                    (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[(3 + 3 * team) %
                                                             6]], team),
                     self.convertPositionY(
                         self.sim.data.qpos[self.robot_joints[(3 + 3 * team) %
                                                              6] + 1], team)),
                }, {
                    "position": (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[(4 + 3 * team) %
                                                             6]], team),
                                 self.convertPositionY(
                                     self.sim.data.qpos[self.robot_joints[
                                         (4 + 3 * team) % 6] + 1], team)),
                }, {
                    "position": (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[(5 + 3 * team) %
                                                             6]], team),
                                 self.convertPositionY(
                                     self.sim.data.qpos[self.robot_joints[
                                         (5 + 3 * team) % 6] + 1], team)),
                }
            ],
            {  # bola
                "position":
                (self.convertPositionX(self.sim.data.qpos[self.ball_joint],
                                       team),
                 self.convertPositionY(self.sim.data.qpos[self.ball_joint + 1],
                                       team))
            }
        ]
Example #25
0
import mujoco_py
import numpy as np
from gym import spaces
from mujoco_py import load_model_from_path, MjSim, MjViewer

from HCPI.util import seeding

if __name__ == '__main__':
    desp = 'Display Robot'
    parser = argparse.ArgumentParser(description=desp)
    parser.add_argument('--robot_file',
                        type=str,
                        default='../xml/simrobot/6dof/pusher.xml')
    args = parser.parse_args()
    np.set_printoptions(precision=6, suppress=True)
    print('Displaying robot from:', os.path.abspath(args.robot_file))
    model = load_model_from_path(args.robot_file)
    sim = MjSim(model, nsubsteps=20)
    sim.reset()
    sim.step()
    # sim.render(64,64) # observation
    viewer = MjViewer(sim)
    while True:
        ctrl = (np.random.random(8) - 0.5) * 2
        ctrl = np.multiply(ctrl, np.array([40, 2, 2, 9, 9, 0, 0, 0]))
        ctrl[0] = -40
        sim.data.ctrl[:] = ctrl
        sim.step()
        viewer.render()
Example #26
0
def test_sim_state():
    model = load_model_from_xml(BASIC_MODEL_XML)

    foo = 10
    d = {"foo": foo,
         "foo_array": np.array([foo, foo, foo]),
         "foo_2darray": np.reshape(np.array([foo, foo, foo, foo]), (2, 2)),
         }

    def udd_callback(sim):
        return d

    sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback)

    state = sim.get_state()
    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state.udd_state.keys():
        if (isinstance(state.udd_state[k], Number)):
            assert state.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state.udd_state[k], sim.udd_state[k])

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == (1 + sim.model.nq + sim.model.nv + sim.model.na + 8)

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state2.udd_state.keys():
        if (isinstance(state2.udd_state[k], Number)):
            assert state2.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state2.udd_state[k], sim.udd_state[k])

    assert state2 == state
    assert not state2 != state

    # test equality with deleting keys
    state2 = state2._replace(udd_state={"foo": foo})
    assert state2 != state
    assert not (state2 == state)

    # test equality with changing contents of array
    state2 = state2._replace(
        udd_state={"foo": foo, "foo_array": np.array([foo, foo + 1])})
    assert state2 != state
    assert not (state2 == state)

    # test equality with adding keys
    d2 = dict(d)
    d2.update({"not_foo": foo})
    state2 = state2._replace(udd_state=d2)
    assert state2 != state
    assert not (state2 == state)

    # test defensive copy
    sim.set_state(state)
    state.qpos[0] = -1
    assert not np.array_equal(state.qpos, sim.data.qpos)

    state3 = sim.get_state()
    state3.qpos[0] = -1
    assert not np.array_equal(state3.qpos, sim.data.qpos)
    state3.udd_state["foo_array"][0] = -1
    assert not np.array_equal(
        state3.udd_state["foo_array"], sim.udd_state["foo_array"])

    # test no callback
    sim = MjSim(model, nsubsteps=2)
    state = sim.get_state()
    print("state.udd_state = %s" % state.udd_state)

    assert state.udd_state == {}

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == 1 + sim.model.nq + sim.model.nv + sim.model.na

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    assert state.udd_state == sim.udd_state
class Square2dEnv(Env):
    # TODO make this into GoalEnv
    def __init__(self,
                 model_path='./square2d.xml',
                 distance_threshold=1e-1,
                 frame_skip=2,
                 goal=[0.3, 0.3],
                 horizon=100):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)

        self.model = load_model_from_path(fullpath)
        self.seed()
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.distance_threshold = distance_threshold
        self.frame_skip = frame_skip
        self.set_goal_location(goal)
        self.reward_type = 'dense'
        self.horizon = horizon
        self.time_step = 0
        obs = self.get_current_observation()
        self.action_space = spaces.Box(-1., 1., shape=(2, ))
        self.observation_space = spaces.Box(-np.inf, np.inf, shape=obs.shape)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):
        self.set_ball_location([0., 0.])
        self.sim.forward()
        self.time_step = 0
        return self.get_current_observation()

    def _get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            self.viewer_setup()
        return self.viewer

    def viewer_setup(self):

        self.viewer.cam.lookat[
            0] = 0.0  # x,y,z offset from the object (works if trackbodyid=-1)
        self.viewer.cam.lookat[1] = 0.0
        self.viewer.cam.lookat[2] = 0.0
        self.viewer.cam.elevation = -90  # camera rotation around the axis in the plane going through the frame origin (if 0 you just see a line)
        self.viewer.cam.azimuth = 90
        self.viewer.cam.distance = 1.5

    def set_goal_location(self, goalPos):
        # goal = [xLoc, yLoc]
        assert np.linalg.norm(np.asarray(goalPos) - np.asarray([0.3, 0.3]),
                              axis=-1) < 0.1
        self.sim.data.qpos[0] = goalPos[0]
        self.sim.data.qpos[1] = goalPos[1]

    def set_ball_location(self, ballPos):

        self.sim.data.qpos[2] = ballPos[0]
        self.sim.data.qpos[3] = ballPos[1]

    def set_distance_threshold(self, distance_threshold):
        self.distance_threshold = distance_threshold

    def set_frame_skip(self, frame_skip):
        self.frame_skip = frame_skip

    def get_frame_skip(self):
        return self.frame_skip

    def get_distance_threshold(self):
        return self.distance_threshold

    def get_ball_location(self):
        return self.sim.data.qpos[2:4]

    def get_goal_location(self):
        return self.sim.data.qpos[0:2]

    def get_ball_velocity(self):
        return self.sim.data.qvel[2:4]

    def send_control_command(self, xDirectionControl, yDirectionControl):

        self.sim.data.ctrl[0] = xDirectionControl
        self.sim.data.ctrl[1] = yDirectionControl

    def get_current_observation(self):
        obs = np.concatenate([
            self.get_goal_location(),
            self.get_ball_location(),
            self.get_ball_velocity()
        ]).ravel()
        return obs.copy()
        # obs = np.concatenate([self.get_ball_location(), self.get_ball_velocity()]).ravel()
        # desired_goal = self.get_goal_location()
        # achieved_goal = self.get_ball_location()
        # return {
        #     'observation': obs.copy(),
        #     'achieved_goal': achieved_goal.copy(),
        #     'desired_goal': desired_goal.copy()
        #
        # }

    def get_image_of_goal_observation(self, xLoc=None, yLoc=None):

        if not xLoc:
            xLoc = self.sim.data.qpos[0]

        if not yLoc:
            yLoc = self.sim.data.qpos[1]

        self.sim.data.qpos[0] = xLoc
        self.sim.data.qpos[1] = yLoc
        self.sim.data.qpos[2] = xLoc
        self.sim.data.qpos[3] = yLoc

        self.render()

    def do_simulation(self, ctrl, n_frames):
        self.send_control_command(ctrl[0], ctrl[1])
        for _ in range(n_frames):
            self.take_step()

    def step(self, ctrl):

        if np.linalg.norm(self.get_goal_location() - [0.3, 0.3],
                          axis=-1) > 0.1:
            print(self.get_goal_location())
            # assert False
        ctrl = np.clip(ctrl, -1., 1.)
        self.do_simulation(ctrl, self.frame_skip)
        obs = self.get_current_observation()
        info = {}
        reward = self.compute_reward(self.get_ball_location(),
                                     self.get_goal_location(), {})
        done = (reward == 1.0)
        self.time_step += 1
        if self.time_step >= self.horizon:
            done = True
        return obs, reward, done, info

    def take_step(self):
        self.sim.step()

    def render(self, mode='human'):
        self._get_viewer().render()

    def compute_reward(self, achieved_goal, desired_goal, info):
        # Compute distance between goal and the achieved goal.
        d = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
        if self.reward_type == 'sparse':
            return -(d > self.distance_threshold).astype(np.float32)
        else:
            return -d

    def _is_success(self, achieved_goal, desired_goal):
        d = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
        return (d < self.distance_threshold).astype(np.float32)

    def log_diagnostics(self, paths):
        pass

    def terminate(self):
        pass
Example #28
0
def test_tableBoundViolation():
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    test1 = np.array([-0.0031, -0.9718, -0.7269, -2.4357, -0.0157, 1.5763, 0.7303]) #False
    test2 = np.array([0.0264, -0.0772, 0.1924, -2.8478, -0.0273, 2.8339, 0.7566]) #True
    test3 = np.array([-1.4870, -1.7289, 1.6138, -1.9814, -0.9856, 1.9304, 0.9981]) #True
    test4 = np.array([-0.5250, -0.6410, 0.1893, -1.3827, -0.2573, 2.1356, 0.7116]) #False
    test5 = np.array([-0.0133, 0.9846, 0.0365, -1.5491, 2.8629, 0.7630, 0.6254]) #True

    qd = np.zeros(7)

    state = MjSimState(time=0,qpos=test1,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test1:", "Passed" if not bounds.tableBoundViolation(sim) else "Failed")
    state = MjSimState(time=0,qpos=test2,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test2:", "Passed" if bounds.tableBoundViolation(sim) else "Failed")
    state = MjSimState(time=0,qpos=test3,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test3:", "Passed" if bounds.tableBoundViolation(sim) else "Failed")
    state = MjSimState(time=0,qpos=test4,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test4:", "Passed" if not bounds.tableBoundViolation(sim) else "Failed")
    state = MjSimState(time=0,qpos=test5,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test5:", "Passed" if bounds.tableBoundViolation(sim) else "Failed")
Example #29
0
def main(args):
    if not os.path.isdir(args.result_dir):
        os.makedirs(args.result_dir)
    parent = os.path.dirname(os.path.abspath(__file__))
    # load test xml files
    test_file = os.path.join(
        parent, 'tests/test_xmls/temp_1_{}.pickle'.format(args.case))
    params = pickle.load(open(test_file, 'rb'))
    # params = params[:6]
    if args.shuffle:
        random.shuffle(params)

    num_test = len(params)
    print('                    ++++++++++++++++++++++++++')
    print('                    +++ Now running case {} +++'.format(args.case))
    print('                    ++++++++++++++++++++++++++\n\n')

    # Create our policy net and a target net
    policy_net = DRQN(args.ftdim, args.outdim).to(args.device)
    if args.position:
        tactile_net = TactileNet(args.indim - 6, args.ftdim).to(args.device)
    elif args.force:
        tactile_net = TactileNet(args.indim - 390, args.ftdim).to(args.device)
    else:
        tactile_net = TactileNet(args.indim, args.ftdim).to(args.device)

    # Setup the state normalizer
    normalizer = Multimodal_Normalizer(num_inputs=args.indim,
                                       device=args.device)

    if args.weight_policy:
        checkpoint = torch.load(args.weight_policy)
        policy_net.load_state_dict(checkpoint['policy_net_1'])
    if args.weight_tactile:
        checkpoint = torch.load(args.weight_tactile)
        tactile_net.load_state_dict(checkpoint['tactile_net_1'])
    if args.normalizer_file:
        if os.path.exists(args.normalizer_file):
            normalizer.restore_state(args.normalizer_file)

    # Create robot, reset simulation and grasp handle
    model = load_model_from_path(args.model_path)
    sim = MjSim(model)
    sim_param = SimParameter(sim)
    sim.step()
    if args.render:
        viewer = MjViewer(sim)
    else:
        viewer = None

    robot = RobotSim(sim, viewer, sim_param, args.render, args.break_thresh)

    tactile_obs_space = TactileObs(
        robot.get_gripper_xpos(),  # 24
        robot.get_all_touch_buffer(args.hap_sample))  # 30 x 6

    performance = {
        'time': [],
        'success': [],
        'num_broken': [],
        'tendon_hist': [0, 0, 0, 0, 0],
        'collision': []
    }

    for i in range(num_test):
        velcro_params = params[i]
        geom, origin_offset, euler, radius = velcro_params
        print('\n\nTest {} Velcro parameters are: {}, {}, {}, {}'.format(
            i, geom, origin_offset, euler, radius))
        change_sim(robot.mj_sim, geom, origin_offset, euler, radius)
        robot.reset_simulation()
        ret = robot.grasp_handle()
        performance = test_network(args, policy_net, tactile_net, normalizer,
                                   robot, tactile_obs_space, performance)
        print('Success: {}, time: {}, num_broken: {}, collision:{} '.format(
            performance['success'][-1], performance['time'][-1],
            performance['num_broken'][-1], performance['collision'][-1]))

    print('Finished opening velcro with haptics test \n')
    success = np.array(performance['success'])
    time = np.array(performance['time'])
    print('Successfully opened the velcro in: {}% of cases'.format(
        100 * np.sum(success) / len(performance['success'])))
    print('Average time to open: {}'.format(np.average(time[success > 0])))
    # print('Action histogram for the test is: {}'.format(performance['action_hist']))

    # collision = np.array(performance['collision'])
    # threshold = 3000
    # high_success = float(np.sum(success[collision<threshold])) / float(np.sum(np.ones(num_test)[collision<threshold]))
    # low_success =  float(np.sum(success[collision>threshold])) / float(np.sum(np.ones(num_test)[collision>threshold]))
    # print('high_success: {} low_success: {} '.format(high_success, low_success))

    ablation = 'None'
    if args.position:
        ablation = 'position'
    if args.force:
        ablation = 'force'
    checkpoint = args.weight_policy.split('/')[-1]
    out_fname = 'case{}_{}_{}.txt'.format(args.case, ablation, checkpoint)
    with open(os.path.join(args.result_dir, out_fname), 'w+') as f:
        f.write('Time: {}\n'.format(performance['time']))
        f.write('Success: {}\n'.format(performance['success']))
        f.write('Successfully opened the velcro in: {}% of cases\n'.format(
            100 * np.sum(success) / len(performance['success'])))
        f.write('Average time to open: {}\n'.format(
            np.average(time[success > 0])))
        f.write('Num_broken: {}\n'.format(performance['num_broken']))
        f.write('Tendon histogram: {}\n'.format(performance['tendon_hist']))
        f.write('collision: {}\n'.format(performance['collision']))
        # f.write('high_success: {} low_success: {} '.format(high_success, low_success))
    f.close()
Example #30
0
class MujocoEnv(metaclass=EnvMeta):
    """Initializes a Mujoco Environment."""

    parameters_spec = {}

    def __init__(
        self,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        use_camera_obs=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            has_renderer (bool): If true, render the simulation state in 
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes 
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes 
                in camera. False otherwise.

            control_freq (float): how many control signals to receive 
                in every simulated second. This sets the amount of simulation time 
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            use_camera_obs (bool): if True, every observation includes a 
                rendered image.

            camera_name (str): name of camera to be rendered. Must be 
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """

        self.has_renderer = has_renderer
        self.has_offscreen_renderer = has_offscreen_renderer
        self.render_collision_mesh = render_collision_mesh
        self.render_visual_mesh = render_visual_mesh
        self.control_freq = control_freq
        self.horizon = horizon
        self.ignore_done = ignore_done
        self.viewer = None
        self.model = None

        # settings for camera observations
        self.use_camera_obs = use_camera_obs
        if self.use_camera_obs and not self.has_offscreen_renderer:
            raise ValueError(
                "Camera observations require an offscreen renderer.")
        self.camera_name = camera_name
        if self.use_camera_obs and self.camera_name is None:
            raise ValueError("Must specify camera name when using camera obs")
        self.camera_height = camera_height
        self.camera_width = camera_width
        self.camera_depth = camera_depth

        self._reset_internal()

    def initialize_time(self, control_freq):
        """
        Initializes the time constants used for simulation.
        """
        self.cur_time = 0
        self.model_timestep = self.sim.model.opt.timestep
        if self.model_timestep <= 0:
            raise XMLError("xml model defined non-positive time step")
        self.control_freq = control_freq
        if control_freq <= 0:
            raise SimulationError(
                "control frequency {} is invalid".format(control_freq))
        self.control_timestep = 1. / control_freq

    def _load_model(self):
        """Loads an xml model, puts it in self.model"""
        pass

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        pass

    def reset(self, **kwargs):
        """Resets simulation."""
        # TODO(yukez): investigate black screen of death
        # if there is an active viewer window, destroy it
        self._destroy_viewer()
        self.reset_props(**kwargs)
        self._reset_internal()

        self.sim.forward()
        return self._get_observation()

    def reset_props(self):
        print(
            'INFO from GZZ: this is the base class reset_props. This means the environment does not support domain randomization'
        )

    def init_viewer(self):
        print('init_viewer', self.viewer)

        if self.has_offscreen_renderer:
            print(
                'WARNING from GZZ: robosuite has a bug on simutaneously rendering for both offscreen (like camera obs) and onscreen (X window), and will give (at least) onscreen `black screen of death` after a reset. Please check their issue: https://github.com/StanfordVL/robosuite/issues/25 . I failed to help them debug on this.'
            )

        self.viewer = MujocoPyRenderer(self.sim)
        self.viewer.viewer.vopt.geomgroup[0] = (1 if self.render_collision_mesh
                                                else 0)
        self.viewer.viewer.vopt.geomgroup[
            1] = 1 if self.render_visual_mesh else 0

        # hiding the overlay speeds up rendering significantly
        self.viewer.viewer._hide_overlay = True

    def _reset_internal(self):
        """Resets simulation internal configurations."""
        # instantiate simulation from MJCF model
        self._load_model()
        self.mjpy_model = self.model.get_model(mode="mujoco_py")
        self.sim = MjSim(self.mjpy_model)
        self.initialize_time(self.control_freq)

        # create visualization screen or renderer
        if self.has_renderer and self.viewer is None:
            self.init_viewer()

        elif self.has_offscreen_renderer:
            if self.sim._render_context_offscreen is None:
                render_context = MjRenderContextOffscreen(self.sim)
                self.sim.add_render_context(render_context)
            self.sim._render_context_offscreen.vopt.geomgroup[0] = (
                1 if self.render_collision_mesh else 0)
            self.sim._render_context_offscreen.vopt.geomgroup[1] = (
                1 if self.render_visual_mesh else 0)

        # additional housekeeping
        self.sim_state_initial = self.sim.get_state()
        self._get_reference()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

    def _get_observation(self):
        """Returns an OrderedDict containing observations [(name_string, np.array), ...]."""
        return OrderedDict()

    def step(self, action):
        """Takes a step in simulation with control command @action."""
        if self.done:
            raise ValueError("executing action in terminated episode")
        self.timestep += 1
        self._pre_action(action)
        end_time = self.cur_time + self.control_timestep
        while self.cur_time < end_time:
            self.sim.step()
            self.cur_time += self.model_timestep
        reward, done, info = self._post_action(action)
        return self._get_observation(), reward, done, info

    def _pre_action(self, action):
        """Do any preprocessing before taking an action."""
        self.sim.data.ctrl[:] = action

    def _post_action(self, action):
        """Do any housekeeping after taking an action."""
        reward = self.reward(action)

        # done if number of elapsed timesteps is greater than horizon
        self.done = (self.timestep >= self.horizon) and not self.ignore_done
        return reward, self.done, {}

    def reward(self, action):
        """Reward should be a function of state and action."""
        return 0

    def render(self):
        """
        Renders to an on-screen window.
        """
        if self.viewer is None:
            self.has_renderer = True
            self.init_viewer()

        self.viewer.render()

    def observation_spec(self):
        """
        Returns an observation as observation specification.

        An alternative design is to return an OrderedDict where the keys
        are the observation names and the values are the shapes of observations.
        We leave this alternative implementation commented out, as we find the
        current design is easier to use in practice.
        """
        observation = self._get_observation()
        return observation

        # observation_spec = OrderedDict()
        # for k, v in observation.items():
        #     observation_spec[k] = v.shape
        # return observation_spec

    def action_spec(self):
        """
        Action specification should be implemented in subclasses.

        Action space is represented by a tuple of (low, high), which are two numpy
        vectors that specify the min/max action limits per dimension.
        """
        raise NotImplementedError

    def reset_from_xml_string(self, xml_string):
        """Reloads the environment from an XML description of the environment."""

        # if there is an active viewer window, destroy it
        self.close()

        # load model from xml
        self.mjpy_model = load_model_from_xml(xml_string)

        self.sim = MjSim(self.mjpy_model)
        self.initialize_time(self.control_freq)
        if self.has_renderer and self.viewer is None:
            self.init_viewer()

        elif self.has_offscreen_renderer:
            render_context = MjRenderContextOffscreen(self.sim)
            render_context.vopt.geomgroup[
                0] = 1 if self.render_collision_mesh else 0
            render_context.vopt.geomgroup[
                1] = 1 if self.render_visual_mesh else 0
            self.sim.add_render_context(render_context)

        self.sim_state_initial = self.sim.get_state()
        self._get_reference()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

        # necessary to refresh MjData
        self.sim.forward()

    def find_contacts(self, geoms_1, geoms_2):
        """
        Finds contact between two geom groups.

        Args:
            geoms_1: a list of geom names (string)
            geoms_2: another list of geom names (string)

        Returns:
            iterator of all contacts between @geoms_1 and @geoms_2
        """
        for contact in self.sim.data.contact[0:self.sim.data.ncon]:
            # check contact geom in geoms
            c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1
            c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2
            # check contact geom in geoms (flipped)
            c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1
            c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2
            if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1):
                yield contact

    def _check_contact(self):
        """Returns True if gripper is in contact with an object."""
        return False

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        return False

    def _destroy_viewer(self):
        # if there is an active viewer window, destroy it
        if self.viewer is not None:
            print('_destroy_viewer', self.viewer)
            self.viewer.close()  # change this to viewer.finish()?
            self.viewer = None

    def close(self):
        """Do any cleanup necessary here."""
        self._destroy_viewer()
Example #31
0
        <body name="box" pos="-0.8 0 0.2">
            <geom mass="0.1" size="0.15 0.15 0.15" type="box"/>
        </body>
        <body name="floor" pos="0 0 0.025">
            <geom condim="3" size="1.0 1.0 0.02" rgba="0 1 0 1" type="box"/>
        </body>
    </worldbody>
    <actuator>
        <motor gear="2000.0" joint="slide0"/>
        <motor gear="2000.0" joint="slide1"/>
    </actuator>
</mujoco>
"""

model = load_model_from_xml(MODEL_XML)
sim = MjSim(model)
# viewer = MjViewer(sim)
# t = 0
# while True:
#     sim.data.ctrl[0] = math.cos(t / 10.) * 0.01
#     sim.data.ctrl[1] = math.sin(t / 10.) * 0.01
#     t += 1
#     sim.step()
#     viewer.render()
#     if t > 100 and os.getenv('TESTING') is not None:
#         break

renderer = MjBatchRenderer(100, 100, use_cuda=True)
renderer.render(sim)
renderer.map()
image = renderer.read()
Example #32
0
# Initializing servos
servo1 = Servo(coordinates=(0, 0, 0.5))
servo2 = Servo(coordinates=(0, 0, 0))
# servo3 = Servo(coordinates = (0, 0, 0))

# Joint connecting the servos
servo2.attach_to_servo(servo1)
# servo3.join_servo(servo2, 1)

# Building the world
worldbody = World_body()
servo1_body = Servo_body(servo1)
servo2_body = Servo_body(servo2)
servo1_body.append_servo_body(servo2_body)
worldbody.append_servo_body(servo1_body)
build_xml_file(worldbody.root)

TEST_XML = worldbody.get_root_str()

# Building the model
model = load_model_from_xml(TEST_XML)
sim = MjSim(model)
viewer = MjViewer(sim)
t = 0
while True:
    t += 1
    sim.step()
    viewer.render()
    if t > 100 and os.getenv('TESTING') is not None:
        break
 def get_sim(seed):
     geom_type = ["box", "sphere"][seed % 2]
     model = load_model_from_xml(xml.format(geom_type=geom_type))
     return MjSim(model)
Example #34
0
from mujoco_py import load_model_from_path, MjSim, MjViewer, functions
import mujoco_py.generated.const as const

# Load the model as defined in:
# https://github.com/deepmind/dm_control/blob/master/dm_control/suite/humanoid.xml
MODEL_PATH = '../assets/examples/dm/humanoid.xml'
model = load_model_from_path(MODEL_PATH)


def render_callback(sim, viewer):
    pass


sim = MjSim(model, render_callback=render_callback)
viewer = MjViewer(sim)

t = 0
while True:

    # All constants, including enum accessors are listed under:
    # https://github.com/openai/mujoco-py/blob/master/mujoco_py/generated/const.py

    # An MjSim instance has multiple properties.
    # See: https://github.com/openai/mujoco-py/blob/master/mujoco_py/mjsim.pyx
    # For a cython file, properties are listed as cdef expressions

    # sim.get_state()
    # Returns an MjSimState instance
    # See: https://github.com/openai/mujoco-py/blob/master/mujoco_py/mjsimstate.pyx
    # Has properties: time, qpos, qvel, act, uddstate.
    # These are generally just convenience properties pulled out of PyMjModel and PyMjData,
Example #35
0
        <motor gear="100" joint="right_hip" name="right_hip"/>
        <motor gear="200" joint="right_knee" name="right_knee"/>
        <motor gear="100" joint="left_hip" name="left_hip"/>
        <motor gear="200" joint="left_knee" name="left_knee"/>
        <motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
        <motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
        <motor gear="25" joint="right_elbow" name="right_elbow"/>
        <motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
        <motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
        <motor gear="25" joint="left_elbow" name="left_elbow"/>
    </actuator>
</mujoco>
"""

model = load_model_from_xml(MODEL_XML)
sim = MjSim(model)
viewer = MjViewer(sim)

x = np.load('results/k_tree.npy')

sim_state = sim.get_state()
print(sim_state.qpos)

step = 0
while True:
    sim_state = sim.get_state()
    #for i in range(3, 7):
    #    sim_state.qpos[i] = y[i]
    print(sim_state.qpos)

    for i in range(10, 24):
Example #36
0
class BaseMujocoEnv(BaseEnv):
    def __init__(self, model_path, _hp):
        self._frame_height = _hp.viewer_image_height
        self._frame_width = _hp.viewer_image_width

        self._reset_sim(model_path)

        self._base_adim, self._base_sdim = None, None  #state/action dimension of Mujoco control
        self._adim, self._sdim = None, None  #state/action dimension presented to agent
        self.num_objects, self._n_joints = None, None
        self._goal_obj_pose = None
        self._goaldistances = []

        self._ncam = _hp.ncam
        if self._ncam == 2:
            self.cameras = ['maincam', 'leftcam']
        elif self._ncam == 1:
            self.cameras = ['maincam']
        else:
            raise ValueError

        self._last_obs = None
        self._hp = _hp

    def _default_hparams(self):
        parent_params = super()._default_hparams()
        parent_params.add_hparam('viewer_image_height', 480)
        parent_params.add_hparam('viewer_image_width', 640)
        parent_params.add_hparam('ncam', 1)

        return parent_params

    def set_goal_obj_pose(self, pose):
        self._goal_obj_pose = pose

    def _reset_sim(self, model_path):
        """
        Creates a MjSim from passed in model_path
        :param model_path: Absolute path to model file
        :return: None
        """
        self._model_path = model_path
        self.sim = MjSim(load_model_from_path(self._model_path))

    def reset(self):
        self._goaldistances = []

    def render(self):
        """ Renders the enviornment.
        Implements custom rendering support. If mode is:

        - dual: renders both left and main cameras
        - left: renders only left camera
        - main: renders only main (front) camera
        :param mode: Mode to render with (dual by default)
        :return: uint8 numpy array with rendering from sim
        """
        images = np.zeros(
            (self._ncam, self._frame_height, self._frame_width, 3),
            dtype=np.uint8)
        for i, cam in enumerate(self.cameras):
            images[i] = self.sim.render(self._frame_width,
                                        self._frame_height,
                                        camera_name=cam)
        return images

    def project_point(self, point, camera):
        model_matrix = np.zeros((4, 4))
        model_matrix[:3, :3] = self.sim.data.get_camera_xmat(camera).T
        model_matrix[-1, -1] = 1

        fovy_radians = np.deg2rad(
            self.sim.model.cam_fovy[self.sim.model.camera_name2id(camera)])
        uh = 1. / np.tan(fovy_radians / 2)
        uw = uh / (self._frame_width / self._frame_height)
        extent = self.sim.model.stat.extent
        far, near = self.sim.model.vis.map.zfar * extent, self.sim.model.vis.map.znear * extent
        view_matrix = np.array([
            [uw, 0., 0., 0.],  # matrix definition from
            [
                0., uh, 0., 0.
            ],  # https://stackoverflow.com/questions/18404890/how-to-build-perspective-projection-matrix-no-api
            [0., 0., far / (far - near), -1.],
            [0., 0., -2 * far * near / (far - near), 0.]
        ])  # Note Mujoco doubles this quantity

        MVP_matrix = view_matrix.dot(model_matrix)
        world_coord = np.ones((4, 1))
        world_coord[:3, 0] = point - self.sim.data.get_camera_xpos(camera)

        clip = MVP_matrix.dot(world_coord)
        ndc = clip[:3] / clip[3]  # everything should now be in -1 to 1!!
        col, row = (ndc[0] + 1) * self._frame_width / 2, (
            -ndc[1] + 1) * self._frame_height / 2

        return self._frame_height - row, col  # rendering flipped around in height

    def get_desig_pix(self, target_width, round=True, obj_poses=None):
        qpos_dim = self._n_joints  # the states contains pos and vel
        assert self.sim.data.qpos.shape[0] == qpos_dim + 7 * self.num_objects
        desig_pix = np.zeros([self._ncam, self.num_objects, 2], dtype=np.int)
        ratio = self._frame_width / target_width
        for icam, cam in enumerate(self.cameras):
            for i in range(self.num_objects):
                if obj_poses is None:
                    fullpose = self.sim.data.qpos[i * 7 +
                                                  qpos_dim:(i + 1) * 7 +
                                                  qpos_dim].squeeze()
                    chosen_point = fullpose[:3]
                else:
                    chosen_point = obj_poses[i, :3]
                d = self.project_point(chosen_point, cam)
                d = np.stack(d) / ratio
                if round:
                    d = np.around(d).astype(np.int)
                desig_pix[icam, i] = d.squeeze()
        return desig_pix

    def get_goal_pix(self, target_width, round=True):
        goal_pix = np.zeros([self._ncam, self.num_objects, 2], dtype=np.int)
        ratio = self._frame_width / target_width
        for icam, cam in enumerate(self.cameras):
            for i in range(self.num_objects):
                g = self.project_point(self._goal_obj_pose[i, :3], cam)
                g = np.stack(g) / ratio
                if round:
                    g = np.around(g).astype(np.int)
                goal_pix[icam, i] = g.squeeze()
        return goal_pix

    def eval(self, target_width=None, save_dir=None, ntasks=None):
        self._goaldistances.append(self.get_distance_score())
        stats = {}
        stats['improvement'] = self._goaldistances[0] - self._goaldistances[-1]
        stats['initial_dist'] = self._goaldistances[0]
        stats['final_dist'] = self._goaldistances[-1]
        return stats

    def get_distance_score(self):
        """
        :return:  mean of the distances between all objects and goals
        """
        abs_distances = []
        for i_ob in range(self.num_objects):
            goal_pos = self._goal_obj_pose[i_ob, :3]
            curr_pos = self.sim.data.qpos[self._n_joints +
                                          i_ob * 7:self._n_joints + 3 +
                                          i_ob * 7]
            abs_distances.append(np.linalg.norm(goal_pos - curr_pos))
        return np.mean(np.array(abs_distances))

    def snapshot_noarm(self):
        raise NotImplementedError

    @property
    def adim(self):
        return self._adim

    @property
    def sdim(self):
        return self._sdim

    @property
    def ncam(self):
        return self._ncam
Example #37
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=640, height=480, mode='offscreen',
                                             camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))
Example #38
0
class Dribble_Env(object):
    def __init__(self):
        self.model = load_model_from_path("./xml/world5.xml") 
        self.sim = MjSim(self.model)
        # self.viewer = MyMjViewer(self.sim)
        self.viewer = MyMjViewer(self.sim)
        self.max_vel = [-1000,1000]
        self.x_motor = 0
        self.y_motor = 0
        self.date_time = datetime.datetime.now()
        self.path = "./datas/path_date" + str(self.date_time.strftime("_%Y%m%d_%H%M%S"))
        os.mkdir(self.path)

    def step(self,action):
        self.x_motor = ((action %3)-1) * 200
        self.y_motor = ((action//3)-1) * 200
        self.sim.data.ctrl[0] = self.x_motor 
        self.sim.data.ctrl[1] = self.y_motor
        self.sim.step()

    def get_state(self):
        robot_x, robot_y = self.sim.data.body_xpos[1][0:2]
        robot_xv, robot_yv = self.sim.data.qvel[0:2]
        ball_x, ball_y = self.sim.data.body_xpos[2][0:2]
        ball_xv, ball_yv = self.sim.data.qvel[2:4]
        ball_pos_local = -(robot_x - ball_x), -(robot_y - ball_y)
        object1_x, object1_y= self.sim.data.body_xpos[3][0:2]
        object2_x, object2_y= self.sim.data.body_xpos[4][0:2]
        object3_x, object3_y= self.sim.data.body_xpos[5][0:2]
        object4_x, object4_y= self.sim.data.body_xpos[6][0:2]
        closest_object_id = np.argmin([np.linalg.norm([object1_x - robot_x, object1_y - robot_y]),\
                                      np.linalg.norm([object2_x - robot_x, object2_y - robot_y]),\
                                      np.linalg.norm([object3_x - robot_x, object3_y - robot_y]),\
                                      np.linalg.norm([object4_x - robot_x, object4_y - robot_y])])
        
        object_local_x = -(robot_x - self.sim.data.body_xpos[closest_object_id+3][0])
        object_local_y = -(robot_y - self.sim.data.body_xpos[closest_object_id+3][1])

        return [robot_x, robot_y, ball_pos_local[0], ball_pos_local[1], \
                robot_xv, robot_yv,object_local_x,object_local_y,ball_x, ball_y,ball_xv,ball_yv]

    def check_done(self):
        ball_x ,ball_y = self.get_state()[8:10]
        if ball_x > 80 and -25 < ball_y < 25:
            return True
        else:
            return False

    def check_wall(self):
        ball_x, ball_y = self.get_state()[8:10]
        if math.fabs(ball_y) > 51:
            return True
        elif ball_x > 81 and math.fabs(ball_y) > 25:
            return True
        else:
            return False
        
    def check_avoidaince(self,object_num=4):
        for i in range(object_num):
            if math.fabs(self.sim.data.qvel[5+i*3]) > 0.1 or math.fabs(self.sim.data.qvel[6+3*i]) > 0.1:
                return True
        return False

    def reset(self):
        self.x_motor = 0
        self.y_motor = 0

        self.robot_x_data = []
        self.robot_y_data = []
        self.ball_x_data = []
        self.ball_y_data = []
        self.sim.reset()
        # self.sim.data.qpos[0] = np.random.randint(-3,3)
        self.sim.data.qpos[1] = np.random.randint(-5,5)

    def render(self):
        self.viewer.render()

    def plot_data(self,step,t,done,episode,flag,reward):
        self.field_x = [-90,-90,90,90,-90]
        self.field_y = [-60,60,60,-60,-60]
        self.robot_x_data.append(self.sim.data.body_xpos[1][0])
        self.robot_y_data.append(self.sim.data.body_xpos[1][1])
        self.ball_x_data.append(self.sim.data.body_xpos[2][0])
        self.ball_y_data.append(self.sim.data.body_xpos[2][1])

        datas = str(self.robot_x_data[-1])+" "+str(self.robot_y_data[-1])+" "+str(self.ball_x_data[-1])+" "+str(self.ball_y_data[-1])+" "+str(reward)
        with open(self.path + '/plotdata_' + str(episode+1).zfill(4)+ '.txt','a') as f:
            f.write(str(datas)+'\n')
        
        if (t >= step-1 or done) and flag:
            fig1 = plt.figure()
            plt.ion()
            plt.show()
            plt.plot(self.ball_x_data,self.ball_y_data,marker='o',markersize=2,color="red",label="ball")
            plt.plot(self.robot_x_data,self.robot_y_data,marker="o",markersize=2,color='blue',label="robot")
            plt.plot(self.sim.data.body_xpos[3][0],self.sim.data.body_xpos[3][1],marker="o",markersize=8,color='black')
            plt.plot(self.sim.data.body_xpos[4][0],self.sim.data.body_xpos[4][1],marker="o",markersize=8,color='black')
            plt.plot(self.sim.data.body_xpos[5][0],self.sim.data.body_xpos[5][1],marker="o",markersize=8,color='black')
            plt.plot(self.sim.data.body_xpos[6][0],self.sim.data.body_xpos[6][1],marker="o",markersize=8,color='black')
            plt.plot(self.field_x,self.field_y,markersize=1,color="black")
            plt.plot(80,0,marker="X",color="green",label="goal")
            plt.legend(loc="lower right")
            plt.draw()
            plt.pause(0.001)
            plt.close(1)
Example #39
0
def test_inverse_kinematics():
    panda_kinematics = ikpy_panda_kinematics.panda_kinematics()
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    timestep = generatePatternedTrajectories.TIMESTEP
    control_freq = 1/timestep
    total_time = 3
    num_cycles = int(total_time * control_freq)
    qd_init = np.zeros(7)
    plt.ion()
    LW = 1.0
    fig = plt.figure(figsize=(4,15))
    axes = []
    lines = []
    goals = []
    min_vals = {'x': -0.5, 'y': -0.5, 'z': 0.2}
    max_vals = {'x': 0.5, 'y': 0.5, 'z': 0.7}
    ylabels = ["x(m)", "y(m)", "z(m)"]
    ylbounds = [min_vals['x'], min_vals['y'], min_vals['z']]
    yubounds = [max_vals['x'], max_vals['y'], max_vals['z']]
    for i in range(3):
        axes.append(fig.add_subplot(3,1,i+1))
        lines.append(axes[i].plot([],[],'b-', lw=LW)[0])
        goals.append(axes[i].plot([],[],'r-', lw=LW)[0])
        axes[i].set_ylim([ylbounds[i], yubounds[i]])
        axes[i].set_xlim([0,total_time])
        axes[i].set_ylabel(ylabels[i], fontsize=8)
        axes[i].set_xlabel("Time(s)", fontsize=8)

    for test in range(5):
        x_target = np.random.rand() * (max_vals['x'] - min_vals['x']) + min_vals['x']
        y_target = np.random.rand() * (max_vals['y'] - min_vals['y']) + min_vals['y']
        z_target = np.random.rand() * (max_vals['z'] - min_vals['z']) + min_vals['z']
        if\
            min_vals['x'] > x_target or max_vals['x'] < x_target or \
            min_vals['y'] > y_target or max_vals['y'] < y_target or \
            min_vals['z'] > z_target or max_vals['z'] < z_target:
            print("Error! 'xpos' out of range!")
            print("x = %.3f\ny = %.3f\nz = %.3f" % (x_target, y_target, z_target))

            print(max_vals['y'] - min_vals['y'])
            return
        # x_target, y_target, z_target = 0.088, -0.0, 0.926
        # roll = np.random.rand() * np.pi - np.pi/2
        # pitch = np.random.rand() * np.pi - np.pi/2
        # yaw = np.random.rand() * np.pi - np.pi/2
        roll = 0
        pitch = 0
        yaw = np.pi
        ee_goal = [x_target, y_target, z_target, roll, pitch, yaw]

        # temp = bounds.getRandPosInBounds()
        # ee_goal = panda_kinematics.forward_kinematics(temp)
        q_init = bounds.getRandPosInBounds()
        q_goal = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6], init_qpos=q_init)

        for g in range(3):
            goals[g].set_ydata(np.ones(num_cycles) * ee_goal[g])
            goals[g].set_xdata(np.linspace(0,3,num_cycles))
        sim.set_state(MjSimState(time=0,qpos=q_init,qvel=qd_init,act=None,udd_state={}))
        sim.step()
        sim_time = 0
        for i in range(num_cycles):
            state = sim.get_state()
            q = state[1]
            qd = state[2]
            sim.data.ctrl[:] = controllers.PDControl(q=q,qd=qd,qgoal=q_goal)
            sim.step()
            viewer.render()
            if i % 70 == 0:
                xpos = panda_kinematics.forward_kinematics(q)
                for a in range(3):
                    lines[a].set_xdata(np.append(lines[a].get_xdata(), sim_time))
                    lines[a].set_ydata(np.append(lines[a].get_ydata(), xpos[a]))
                fig.canvas.draw()
                fig.canvas.flush_events()
                print("[q\t]:{}".format(np.around(q,3)))
                print("[qgoal\t]:{}".format(np.around(q_goal,3)))
                print("[qgoal2\t]:{}".format(np.around(panda_kinematics.inverse_kinematics(ee_goal[0:3], ee_goal[3:6]),3)))
                print("[EE\t]:{}".format(np.around(panda_kinematics.forward_kinematics(q),3)))
                print("[EEgoal\t]:{}".format(np.around(ee_goal,3)))
            sim_time += timestep
        # panda_kinematics.plot_stick_figure(q)
        for i in range(3):
            lines[i].set_xdata([])
            lines[i].set_ydata([])
        time.sleep(1)
    def __init__(self,
                 model_name,
                 goal_space_train,
                 goal_space_test,
                 project_state_to_end_goal,
                 end_goal_thresholds,
                 initial_state_space,
                 subgoal_bounds,
                 project_state_to_subgoal,
                 subgoal_thresholds,
                 max_actions=1200,
                 num_frames_skip=10,
                 show=False):

        self.name = model_name

        # Create Mujoco Simulation
        self.model = load_model_from_path("./mujoco_files/" + model_name)
        self.sim = MjSim(self.model)

        # Set dimensions and ranges of states, actions, and goals in order to configure actor/critic networks
        if model_name == "pendulum.xml":
            self.state_dim = 2 * len(self.sim.data.qpos) + len(
                self.sim.data.qvel)
        else:
            self.state_dim = len(self.sim.data.qpos) + len(
                self.sim.data.qvel
            )  # State will include (i) joint angles and (ii) joint velocities
        self.action_dim = len(
            self.sim.model.actuator_ctrlrange)  # low-level action dim
        self.action_bounds = self.sim.model.actuator_ctrlrange[:,
                                                               1]  # low-level action bounds
        self.action_offset = np.zeros((len(
            self.action_bounds)))  # Assumes symmetric low-level action ranges
        self.end_goal_dim = len(goal_space_test)
        self.subgoal_dim = len(subgoal_bounds)
        self.subgoal_bounds = subgoal_bounds

        # Projection functions
        self.project_state_to_end_goal = project_state_to_end_goal
        self.project_state_to_subgoal = project_state_to_subgoal

        # Convert subgoal bounds to symmetric bounds and offset.  Need these to properly configure subgoal actor networks
        self.subgoal_bounds_symmetric = np.zeros((len(self.subgoal_bounds)))
        self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds)))

        for i in range(len(self.subgoal_bounds)):
            self.subgoal_bounds_symmetric[i] = (self.subgoal_bounds[i][1] -
                                                self.subgoal_bounds[i][0]) / 2
            self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][
                1] - self.subgoal_bounds_symmetric[i]

        # End goal/subgoal thresholds
        self.end_goal_thresholds = end_goal_thresholds
        self.subgoal_thresholds = subgoal_thresholds

        # Set inital state and goal state spaces
        self.initial_state_space = initial_state_space
        self.goal_space_train = goal_space_train
        self.goal_space_test = goal_space_test
        self.subgoal_colors = [
            "Magenta", "Green", "Red", "Blue", "Cyan", "Orange", "Maroon",
            "Gray", "White", "Black"
        ]

        self.max_actions = max_actions

        # Implement visualization if necessary
        self.visualize = show  # Visualization boolean
        if self.visualize:
            self.viewer = MjViewer(self.sim)
        self.num_frames_skip = num_frames_skip
Example #41
0
#!/usr/bin/env python3
"""
Shows how to toss a capsule to a container.
"""
from mujoco_py import load_model_from_path, MjSim, MjViewer
import os
import scipy.misc
import cv2
import matplotlib.pyplot as plt
import numpy as np

model = load_model_from_path("model.xml")
sim = MjSim(model)

viewer = MjViewer(sim)

sim_state = sim.get_state()
count = 1


def get_image_ball_center(image_src):
    src = image_src
    im_width = image_src.shape[0]
    im_height = image_src.shape[1]
    #grayimg = np.zeros((im_width,im_height),dtype=np.int8)
    pixcount = 0
    pixwx = 0.0
    pixwy = 0.0
    #===================================================================
    grayimg = np.zeros((256, 256), dtype=np.int8)
    for i in range(0, 256):
Example #42
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip):
        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not os.path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.buffer_size = (1600, 1280)

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': 60,
        }

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if (
            type(observation) is tuple) else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low, high = bounds[:, 0], bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf * np.ones(self.obs_dim)
        self.observation_space = spaces.Box(-high, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ------------------------------------------------------------------------

    def reset_model(self):
        """Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def viewer_setup(self):
        """Called when the viewer is initialized and after every reset.
        Optionally implement this method, if you need to tinker with camera
        position and so forth.
        """
        pass

    # ------------------------------------------------------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, )
        assert qvel.shape == (self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer = None
            return

        if mode == 'rgb_array':
            self.viewer_setup()
            return _read_pixels(self.sim, *self.buffer_size)
        elif mode == 'human':
            self._get_viewer().render()

    def _get_viewer(self, mode='human'):
        if self.viewer is None and mode == 'human':
            self.viewer = MjViewer(self.sim)
            self.viewer_setup()
        return self.viewer

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])
Example #43
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument('--robot_file',
                        default='../../xml/simrobot/7dof/robot_mocap.xml',
                        type=str,
                        help='path to robot config')
    parser.add_argument('--save_dir',
                        default='../../xml/gen_xmls/simrobot/reacher',
                        type=str,
                        help='path to save initial joint poses')
    print('Program starts at: \033[92m %s \033[0m' %
          datetime.now().strftime("%Y-%m-%d %H:%M"))
    args = parser.parse_args()
    np.random.seed(args.seed)
    filename = 'poses.json'
    robot_file = args.robot_file
    save_dir = args.save_dir

    ini_bds = np.array([[0.3, 0.6], [-0.2, 0.2], [0.5, 0.7]])
    tgt_bds = np.array([[0.3, 0.6], [-0.3, 0.3], [-0.1, 0.3]])

    initial_num = [4, 5, 3]
    target_num = [4, 7, 5]
    target_state = np.mgrid[tgt_bds[0][0]:tgt_bds[0][1]:complex(target_num[0]),
                            tgt_bds[1][0]:tgt_bds[1][1]:complex(target_num[1]),
                            tgt_bds[2][0]:tgt_bds[2][1]:complex(target_num[2])]
    target_state = target_state.reshape(3, -1).T
    initial_state = np.mgrid[
        ini_bds[0][0]:ini_bds[0][1]:complex(initial_num[0]),
        ini_bds[1][0]:ini_bds[1][1]:complex(initial_num[1]),
        ini_bds[2][0]:ini_bds[2][1]:complex(initial_num[2])]
    initial_state = initial_state.reshape(3, -1).T

    np.random.shuffle(target_state)
    np.random.shuffle(initial_state)

    assert os.path.exists(robot_file)
    model = load_model_from_path(robot_file)
    sim = MjSim(model, nsubsteps=40)
    sim.reset()
    sim.step()
    site_xpos_cur = sim.data.site_xpos[0]
    print('site xpos:', site_xpos_cur)
    init_joint_angles = []
    goal_poses = []
    for idx in tqdm(range(initial_state.shape[0])):
        sim.reset()
        sim.step()
        site_xpos_target = initial_state[idx]
        delta = site_xpos_target - site_xpos_cur
        sim.data.mocap_pos[0] = sim.data.mocap_pos[0] + delta
        for i in range(30):
            sim.step()
            dist = np.linalg.norm(sim.data.site_xpos[0] - site_xpos_target)
            if dist < 0.002:
                joint_angle = sim.data.qpos[:7]
                init_joint_angles.append(joint_angle.tolist())
                break

    for idx in tqdm(range(target_state.shape[0])):
        sim.reset()
        sim.step()
        site_xpos_target = target_state[idx]
        delta = site_xpos_target - site_xpos_cur
        sim.data.mocap_pos[0] = sim.data.mocap_pos[0] + delta
        for i in range(30):
            sim.step()
            dist = np.linalg.norm(sim.data.site_xpos[0] - site_xpos_target)
            if dist < 0.002:
                goal_poses.append(site_xpos_target.tolist())
                break
    print('valid initial pose num: ', len(init_joint_angles))
    print('valid goal pose num: ', len(goal_poses))
    data = {'initial_joint_angles': init_joint_angles, 'ee_goal': goal_poses}
    os.makedirs(save_dir, exist_ok=True)
    save_file = os.path.join(save_dir, filename)
    print('saving to: ', save_file)
    with open(save_file, 'w') as f:
        json.dump(data, f, indent=2)

    print('Program ends at: \033[92m %s \033[0m' %
          datetime.now().strftime("%Y-%m-%d %H:%M"))
class Environment():
    def __init__(self,
                 model_name,
                 goal_space_train,
                 goal_space_test,
                 project_state_to_end_goal,
                 end_goal_thresholds,
                 initial_state_space,
                 subgoal_bounds,
                 project_state_to_subgoal,
                 subgoal_thresholds,
                 max_actions=1200,
                 num_frames_skip=10,
                 show=False):

        self.name = model_name

        # Create Mujoco Simulation
        self.model = load_model_from_path("./mujoco_files/" + model_name)
        self.sim = MjSim(self.model)

        # Set dimensions and ranges of states, actions, and goals in order to configure actor/critic networks
        if model_name == "pendulum.xml":
            self.state_dim = 2 * len(self.sim.data.qpos) + len(
                self.sim.data.qvel)
        else:
            self.state_dim = len(self.sim.data.qpos) + len(
                self.sim.data.qvel
            )  # State will include (i) joint angles and (ii) joint velocities
        self.action_dim = len(
            self.sim.model.actuator_ctrlrange)  # low-level action dim
        self.action_bounds = self.sim.model.actuator_ctrlrange[:,
                                                               1]  # low-level action bounds
        self.action_offset = np.zeros((len(
            self.action_bounds)))  # Assumes symmetric low-level action ranges
        self.end_goal_dim = len(goal_space_test)
        self.subgoal_dim = len(subgoal_bounds)
        self.subgoal_bounds = subgoal_bounds

        # Projection functions
        self.project_state_to_end_goal = project_state_to_end_goal
        self.project_state_to_subgoal = project_state_to_subgoal

        # Convert subgoal bounds to symmetric bounds and offset.  Need these to properly configure subgoal actor networks
        self.subgoal_bounds_symmetric = np.zeros((len(self.subgoal_bounds)))
        self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds)))

        for i in range(len(self.subgoal_bounds)):
            self.subgoal_bounds_symmetric[i] = (self.subgoal_bounds[i][1] -
                                                self.subgoal_bounds[i][0]) / 2
            self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][
                1] - self.subgoal_bounds_symmetric[i]

        # End goal/subgoal thresholds
        self.end_goal_thresholds = end_goal_thresholds
        self.subgoal_thresholds = subgoal_thresholds

        # Set inital state and goal state spaces
        self.initial_state_space = initial_state_space
        self.goal_space_train = goal_space_train
        self.goal_space_test = goal_space_test
        self.subgoal_colors = [
            "Magenta", "Green", "Red", "Blue", "Cyan", "Orange", "Maroon",
            "Gray", "White", "Black"
        ]

        self.max_actions = max_actions

        # Implement visualization if necessary
        self.visualize = show  # Visualization boolean
        if self.visualize:
            self.viewer = MjViewer(self.sim)
        self.num_frames_skip = num_frames_skip

    # Get state, which concatenates joint positions and velocities
    def get_state(self):

        if self.name == "pendulum.xml":
            return np.concatenate([
                np.cos(self.sim.data.qpos),
                np.sin(self.sim.data.qpos), self.sim.data.qvel
            ])
        else:
            return np.concatenate((self.sim.data.qpos, self.sim.data.qvel))

    # Reset simulation to state within initial state specified by user
    def reset_sim(self, next_goal=None):

        # Reset controls
        self.sim.data.ctrl[:] = 0

        if self.name == "ant_reacher.xml":
            while True:
                # Reset joint positions and velocities
                for i in range(len(self.sim.data.qpos)):
                    self.sim.data.qpos[i] = np.random.uniform(
                        self.initial_state_space[i][0],
                        self.initial_state_space[i][1])

                for i in range(len(self.sim.data.qvel)):
                    self.sim.data.qvel[i] = np.random.uniform(
                        self.initial_state_space[len(self.sim.data.qpos) +
                                                 i][0],
                        self.initial_state_space[len(self.sim.data.qpos) +
                                                 i][1])

                # Ensure initial ant position is more than min_dist away from goal
                min_dist = 8
                if np.linalg.norm(next_goal[:2] -
                                  self.sim.data.qpos[:2]) > min_dist:
                    break

        elif self.name == "ant_four_rooms.xml":

            # Choose initial start state to be different than room containing the end goal

            # Determine which of four rooms contains goal
            goal_room = 0

            if next_goal[0] < 0 and next_goal[1] > 0:
                goal_room = 1
            elif next_goal[0] < 0 and next_goal[1] < 0:
                goal_room = 2
            elif next_goal[0] > 0 and next_goal[1] < 0:
                goal_room = 3

            # Place ant in room different than room containing goal
            # initial_room = (goal_room + 2) % 4

            initial_room = np.random.randint(0, 4)
            while initial_room == goal_room:
                initial_room = np.random.randint(0, 4)

            # Set initial joint positions and velocities
            for i in range(len(self.sim.data.qpos)):
                self.sim.data.qpos[i] = np.random.uniform(
                    self.initial_state_space[i][0],
                    self.initial_state_space[i][1])

            for i in range(len(self.sim.data.qvel)):
                self.sim.data.qvel[i] = np.random.uniform(
                    self.initial_state_space[len(self.sim.data.qpos) + i][0],
                    self.initial_state_space[len(self.sim.data.qpos) + i][1])

            # Move ant to correct room
            self.sim.data.qpos[0] = np.random.uniform(3, 6.5)
            self.sim.data.qpos[1] = np.random.uniform(3, 6.5)

            # If goal should be in top left quadrant
            if initial_room == 1:
                self.sim.data.qpos[0] *= -1

            # Else if goal should be in bottom left quadrant
            elif initial_room == 2:
                self.sim.data.qpos[0] *= -1
                self.sim.data.qpos[1] *= -1

            # Else if goal should be in bottom right quadrant
            elif initial_room == 3:
                self.sim.data.qpos[1] *= -1

            # print("Goal Room: %d" % goal_room)
            # print("Initial Ant Room: %d" % initial_room)

        else:

            # Reset joint positions and velocities
            for i in range(len(self.sim.data.qpos)):
                self.sim.data.qpos[i] = np.random.uniform(
                    self.initial_state_space[i][0],
                    self.initial_state_space[i][1])

            for i in range(len(self.sim.data.qvel)):
                self.sim.data.qvel[i] = np.random.uniform(
                    self.initial_state_space[len(self.sim.data.qpos) + i][0],
                    self.initial_state_space[len(self.sim.data.qpos) + i][1])

        self.sim.step()

        # Return state
        return self.get_state()

    # Execute low-level action for number of frames specified by num_frames_skip
    def execute_action(self, action):

        self.sim.data.ctrl[:] = action
        for _ in range(self.num_frames_skip):
            self.sim.step()
            if self.visualize:
                self.viewer.render()

        return self.get_state()

    # Visualize end goal.  This function may need to be adjusted for new environments.
    def display_end_goal(self, end_goal):

        # Goal can be visualized by changing the location of the relevant site object.
        if self.name == "pendulum.xml":
            self.sim.data.mocap_pos[0] = np.array([
                0.5 * np.sin(end_goal[0]), 0, 0.5 * np.cos(end_goal[0]) + 0.6
            ])
        elif self.name == "ur5.xml":

            theta_1 = end_goal[0]
            theta_2 = end_goal[1]
            theta_3 = end_goal[2]

            # shoulder_pos_1 = np.array([0,0,0,1])
            upper_arm_pos_2 = np.array([0, 0.13585, 0, 1])
            forearm_pos_3 = np.array([0.425, 0, 0, 1])
            wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1])

            # Transformation matrix from shoulder to base reference frame
            T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159],
                              [0, 0, 0, 1]])

            # Transformation matrix from upper arm to shoulder reference frame
            T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0],
                              [np.sin(theta_1),
                               np.cos(theta_1), 0, 0], [0, 0, 1, 0],
                              [0, 0, 0, 1]])

            # Transformation matrix from forearm to upper arm reference frame
            T_3_2 = np.array([[np.cos(theta_2), 0,
                               np.sin(theta_2), 0], [0, 1, 0, 0.13585],
                              [-np.sin(theta_2), 0,
                               np.cos(theta_2), 0], [0, 0, 0, 1]])

            # Transformation matrix from wrist 1 to forearm reference frame
            T_4_3 = np.array([[np.cos(theta_3), 0,
                               np.sin(theta_3), 0.425], [0, 1, 0, 0],
                              [-np.sin(theta_3), 0,
                               np.cos(theta_3), 0], [0, 0, 0, 1]])

            # Determine joint position relative to original reference frame
            # shoulder_pos = T_1_0.dot(shoulder_pos_1)
            upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3]
            forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(forearm_pos_3)[:3]
            wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot(
                wrist_1_pos_4)[:3]

            joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos]
            """
            print("\nEnd Goal Joint Pos: ")
            print("Upper Arm Pos: ", joint_pos[0])
            print("Forearm Pos: ", joint_pos[1])
            print("Wrist Pos: ", joint_pos[2])
            """

            for i in range(3):
                self.sim.data.mocap_pos[i] = joint_pos[i]

        elif self.name == "ant_reacher.xml" or self.name == "ant_four_rooms.xml":
            self.sim.data.mocap_pos[0][:3] = np.copy(end_goal[:3])

        else:
            assert False, "Provide display end goal function in environment.py file"

    # Function returns an end goal
    def get_next_goal(self, test):

        end_goal = np.zeros((len(self.goal_space_test)))

        if self.name == "ur5.xml":

            goal_possible = False
            while not goal_possible:
                end_goal = np.zeros(shape=(self.end_goal_dim, ))
                end_goal[0] = np.random.uniform(self.goal_space_test[0][0],
                                                self.goal_space_test[0][1])

                end_goal[1] = np.random.uniform(self.goal_space_test[1][0],
                                                self.goal_space_test[1][1])
                end_goal[2] = np.random.uniform(self.goal_space_test[2][0],
                                                self.goal_space_test[2][1])

                # Next need to ensure chosen joint angles result in achievable task (i.e., desired end effector position is above ground)

                theta_1 = end_goal[0]
                theta_2 = end_goal[1]
                theta_3 = end_goal[2]

                # shoulder_pos_1 = np.array([0,0,0,1])
                upper_arm_pos_2 = np.array([0, 0.13585, 0, 1])
                forearm_pos_3 = np.array([0.425, 0, 0, 1])
                wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1])

                # Transformation matrix from shoulder to base reference frame
                T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0],
                                  [0, 0, 1, 0.089159], [0, 0, 0, 1]])

                # Transformation matrix from upper arm to shoulder reference frame
                T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0],
                                  [np.sin(theta_1),
                                   np.cos(theta_1), 0, 0], [0, 0, 1, 0],
                                  [0, 0, 0, 1]])

                # Transformation matrix from forearm to upper arm reference frame
                T_3_2 = np.array([[np.cos(theta_2), 0,
                                   np.sin(theta_2), 0], [0, 1, 0, 0.13585],
                                  [-np.sin(theta_2), 0,
                                   np.cos(theta_2), 0], [0, 0, 0, 1]])

                # Transformation matrix from wrist 1 to forearm reference frame
                T_4_3 = np.array([[np.cos(theta_3), 0,
                                   np.sin(theta_3), 0.425], [0, 1, 0, 0],
                                  [-np.sin(theta_3), 0,
                                   np.cos(theta_3), 0], [0, 0, 0, 1]])

                forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(
                    forearm_pos_3)[:3]
                wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot(
                    wrist_1_pos_4)[:3]

                # Make sure wrist 1 pos is above ground so can actually be reached
                if np.absolute(end_goal[0]) > np.pi / 4 and forearm_pos[
                        2] > 0.05 and wrist_1_pos[2] > 0.15:
                    goal_possible = True

        elif self.name == "ant_four_rooms.xml":

            # Randomly select one of the four rooms in which the goal will be located
            room_num = np.random.randint(0, 4)

            # Pick exact goal location
            end_goal[0] = np.random.uniform(3, 6.5)
            end_goal[1] = np.random.uniform(3, 6.5)
            end_goal[2] = np.random.uniform(0.45, 0.55)

            # If goal should be in top left quadrant
            if room_num == 1:
                end_goal[0] *= -1

            # Else if goal should be in bottom left quadrant
            elif room_num == 2:
                end_goal[0] *= -1
                end_goal[1] *= -1

            # Else if goal should be in bottom right quadrant
            elif room_num == 3:
                end_goal[1] *= -1

        elif not test and self.goal_space_train is not None:
            for i in range(len(self.goal_space_train)):
                end_goal[i] = np.random.uniform(self.goal_space_train[i][0],
                                                self.goal_space_train[i][1])
        else:
            assert self.goal_space_test is not None, "Need goal space for testing. Set goal_space_test variable in \"design_env.py\" file"

            for i in range(len(self.goal_space_test)):
                end_goal[i] = np.random.uniform(self.goal_space_test[i][0],
                                                self.goal_space_test[i][1])

        # Visualize End Goal
        self.display_end_goal(end_goal)

        return end_goal

    # Visualize all subgoals
    def display_subgoals(self, subgoals):

        # Display up to 10 subgoals and end goal
        if len(subgoals) <= 11:
            subgoal_ind = 0
        else:
            subgoal_ind = len(subgoals) - 11

        for i in range(1, min(len(subgoals), 11)):
            if self.name == "pendulum.xml":
                self.sim.data.mocap_pos[i] = np.array([
                    0.5 * np.sin(subgoals[subgoal_ind][0]), 0,
                    0.5 * np.cos(subgoals[subgoal_ind][0]) + 0.6
                ])
                # Visualize subgoal
                self.sim.model.site_rgba[i][3] = 1
                subgoal_ind += 1

            elif self.name == "ur5.xml":

                theta_1 = subgoals[subgoal_ind][0]
                theta_2 = subgoals[subgoal_ind][1]
                theta_3 = subgoals[subgoal_ind][2]

                # shoulder_pos_1 = np.array([0,0,0,1])
                upper_arm_pos_2 = np.array([0, 0.13585, 0, 1])
                forearm_pos_3 = np.array([0.425, 0, 0, 1])
                wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1])

                # Transformation matrix from shoulder to base reference frame
                T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0],
                                  [0, 0, 1, 0.089159], [0, 0, 0, 1]])

                # Transformation matrix from upper arm to shoulder reference frame
                T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0],
                                  [np.sin(theta_1),
                                   np.cos(theta_1), 0, 0], [0, 0, 1, 0],
                                  [0, 0, 0, 1]])

                # Transformation matrix from forearm to upper arm reference frame
                T_3_2 = np.array([[np.cos(theta_2), 0,
                                   np.sin(theta_2), 0], [0, 1, 0, 0.13585],
                                  [-np.sin(theta_2), 0,
                                   np.cos(theta_2), 0], [0, 0, 0, 1]])

                # Transformation matrix from wrist 1 to forearm reference frame
                T_4_3 = np.array([[np.cos(theta_3), 0,
                                   np.sin(theta_3), 0.425], [0, 1, 0, 0],
                                  [-np.sin(theta_3), 0,
                                   np.cos(theta_3), 0], [0, 0, 0, 1]])

                # Determine joint position relative to original reference frame
                # shoulder_pos = T_1_0.dot(shoulder_pos_1)
                upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3]
                forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(
                    forearm_pos_3)[:3]
                wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot(
                    wrist_1_pos_4)[:3]

                joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos]
                """
                print("\nSubgoal %d Joint Pos: " % i)
                print("Upper Arm Pos: ", joint_pos[0])
                print("Forearm Pos: ", joint_pos[1])
                print("Wrist Pos: ", joint_pos[2])
                """

                # Designate site position for upper arm, forearm and wrist
                for j in range(3):
                    self.sim.data.mocap_pos[3 + 3 * (i - 1) + j] = np.copy(
                        joint_pos[j])
                    self.sim.model.site_rgba[3 + 3 * (i - 1) + j][3] = 1

                # print("\nLayer %d Predicted Pos: " % i, wrist_1_pos[:3])

                subgoal_ind += 1

            elif self.name == "ant_reacher.xml" or self.name == "ant_four_rooms.xml":
                self.sim.data.mocap_pos[i][:3] = np.copy(
                    subgoals[subgoal_ind][:3])
                self.sim.model.site_rgba[i][3] = 1

                subgoal_ind += 1

            else:
                # Visualize desired gripper position, which is elements 18-21 in subgoal vector
                self.sim.data.mocap_pos[i] = subgoals[subgoal_ind]
                # Visualize subgoal
                self.sim.model.site_rgba[i][3] = 1
                subgoal_ind += 1
    def train_POMDP(self):
        args = self.args
        ROOT_DIR = os.path.dirname(os.path.dirname(
            os.path.abspath(__file__)))  # corl2019
        PARENT_DIR = os.path.dirname(ROOT_DIR)  # reserach
        # Create the output directory if it does not exist
        output_dir = os.path.join(PARENT_DIR, 'multistep_pomdp',
                                  args.output_dir)
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)

        # write args to file
        with open(os.path.join(output_dir, 'args.txt'), 'w+') as f:
            json.dump(args.__dict__, f, indent=2)
        f.close()

        # Create our policy net and a target net
        self.policy_net_1 = DRQN(args.indim, args.outdim).to(args.device)
        self.policy_net_2 = DRQN(args.indim, args.outdim).to(args.device)
        self.policy_net_3 = DRQN(args.indim, args.outdim).to(args.device)

        # Set up the optimizer
        self.optimizer_1 = optim.RMSprop(self.policy_net_1.parameters())
        self.optimizer_2 = optim.RMSprop(self.policy_net_2.parameters())
        self.optimizer_3 = optim.RMSprop(self.policy_net_3.parameters())
        self.memory = RecurrentMemory(800)
        self.steps_done = 0

        # Setup the state normalizer
        normalizer = Multimodal_Normalizer(num_inputs=args.indim,
                                           device=args.device)

        print_variables = {'durations': [], 'rewards': [], 'loss': []}
        start_episode = 0
        if args.checkpoint_file:
            if os.path.exists(args.checkpoint_file):
                checkpoint = torch.load(args.checkpoint_file)
                self.policy_net_1.load_state_dict(checkpoint['policy_net_1'])
                self.policy_net_2.load_state_dict(checkpoint['policy_net_2'])
                self.policy_net_3.load_state_dict(checkpoint['policy_net_3'])
                self.optimizer_1.load_state_dict(checkpoint['optimizer_1'])
                self.optimizer_2.load_state_dict(checkpoint['optimizer_2'])
                self.optimizer_3.load_state_dict(checkpoint['optimizer_3'])
                start_episode = checkpoint['epochs']
                self.steps_done = checkpoint['steps_done']
                with open(
                        os.path.join(os.path.dirname(args.checkpoint_file),
                                     'results_pomdp.pkl'), 'rb') as file:
                    plot_dict = pickle.load(file)
                    print_variables['durations'] = plot_dict['durations']
                    print_variables['rewards'] = plot_dict['rewards']

        if args.normalizer_file:
            if os.path.exists(args.normalizer_file):
                normalizer.restore_state(args.normalizer_file)

        if args.memory:
            if os.path.exists(args.memory):
                self.memory.load(args.memory)

        action_space = ActionSpace(dp=0.06, df=10)

        # Create robot, reset simulation and grasp handle
        model = load_model_from_path(args.model_path)
        sim = MjSim(model)
        sim_param = SimParameter(sim)
        sim.step()
        if args.render:
            viewer = MjViewer(sim)
        else:
            viewer = None

        robot = RobotSim(sim, viewer, sim_param, args.render,
                         self.break_threshold)

        # Main training loop
        for ii in range(start_episode, args.epochs):
            start_time = time.time()
            self.steps_done += 1
            act_sequence = []
            if args.sim:
                sim_params = init_model(robot.mj_sim)
                robot.reset_simulation()
                ret = robot.grasp_handle()
                if not ret:
                    continue

                # Local memory for current episode
                localMemory = []

                # Get current observation
                hidden_state_1, cell_state_1 = self.policy_net_1.init_hidden_states(
                    batch_size=1, device=args.device)
                hidden_state_2, cell_state_2 = self.policy_net_2.init_hidden_states(
                    batch_size=1, device=args.device)
                hidden_state_3, cell_state_3 = self.policy_net_3.init_hidden_states(
                    batch_size=1, device=args.device)
                observation_space = TactileObs(
                    robot.get_gripper_xpos(),  # 24
                    robot.get_all_touch_buffer(args.hap_sample))  # 30 x 7
                broken_so_far = 0

            for t in count():
                if not args.quiet and t % 50 == 0:
                    print("Running training episode: {}, iteration: {}".format(
                        ii, t))

                # Select action
                observation = observation_space.get_state()
                if args.position:
                    observation = observation[6:]
                if args.shear:
                    indices = np.ones(len(observation), dtype=bool)
                    indices[6:166] = False
                    observation = observation[indices]
                if args.force:
                    observation = observation[:166]
                normalizer.observe(observation)
                observation = normalizer.normalize(observation)
                action, hidden_state_1, cell_state_1 = self.select_action(
                    observation, hidden_state_1, cell_state_1)

                # record actions in this epoch
                act_sequence.append(action)

                # Perform action
                delta = action_space.get_action(
                    self.ACTIONS[action])['delta'][:3]
                target_position = np.add(robot.get_gripper_jpos()[:3],
                                         np.array(delta))
                target_pose = np.hstack(
                    (target_position, robot.get_gripper_jpos()[3:]))

                if args.sim:
                    robot.move_joint(target_pose,
                                     True,
                                     self.gripping_force,
                                     hap_sample=args.hap_sample)

                    # Get reward
                    done, num = robot.update_tendons()
                    failure = robot.check_slippage()
                    if num > broken_so_far:
                        reward = num - broken_so_far
                        broken_so_far = num
                    else:
                        reward = 0

                    # # Add a movement reward
                    # reward -= 0.05 * np.linalg.norm(target_position - robot.get_gripper_jpos()[:3]) / np.linalg.norm(delta)

                    # Observe new state
                    observation_space.update(
                        robot.get_gripper_xpos(),  # 24
                        robot.get_all_touch_buffer(args.hap_sample))  # 30x7

                # Set max number of iterations
                if t >= self.max_iter:
                    done = True

                # Check if done
                if not done and not failure:
                    next_state = observation_space.get_state()
                    if args.position:
                        next_state = next_state[6:]
                    if args.shear:
                        indices = np.ones(len(next_state), dtype=bool)
                        indices[6:166] = False
                        next_state = next_state[indices]
                    if args.force:
                        next_state = next_state[:166]
                    normalizer.observe(next_state)
                    next_state = normalizer.normalize(next_state)
                else:
                    next_state = None

                # Push new Transition into memory
                localMemory.append(
                    Transition(observation, action, next_state, reward))

                # Optimize the model
                if t % 10 == 0:
                    loss = self.optimize_model()
        #        if loss:
        #            print_variables['loss'].append(loss.item())

        # If we are done, reset the model
                if done or failure:
                    self.memory.push(localMemory)
                    if failure:
                        print_variables['durations'].append(self.max_iter)
                    else:
                        print_variables['durations'].append(t)
                    print_variables['rewards'].append(broken_so_far)
                    plot_variables(self.figure, print_variables,
                                   "Training POMDP")
                    print("Model parameters: {}".format(sim_params))
                    print("Actions in this epoch are: {}".format(act_sequence))
                    print("Epoch {} took {}s, total number broken: {}\n\n".
                          format(ii,
                                 time.time() - start_time, broken_so_far))

                    break

            # Save checkpoints every vew iterations
            if ii % args.save_freq == 0:
                save_path = os.path.join(
                    output_dir, 'checkpoint_model_' + str(ii) + '.pth')
                torch.save(
                    {
                        'epochs': ii,
                        'steps_done': self.steps_done,
                        'policy_net_1': self.policy_net_1.state_dict(),
                        'policy_net_2': self.policy_net_2.state_dict(),
                        'policy_net_3': self.policy_net_3.state_dict(),
                        'optimizer_1': self.optimizer_1.state_dict(),
                        'optimizer_2': self.optimizer_2.state_dict(),
                        'optimizer_3': self.optimizer_3.state_dict(),
                    }, save_path)

            self.memory.save_memory(os.path.join(output_dir, 'memory.pickle'))

        if args.savefig_path:
            now = dt.datetime.now()
            self.figure[0].savefig(
                args.savefig_path +
                '{}_{}_{}.png'.format(now.month, now.day, now.hour),
                format='png')

        print('Training done')
        plt.show()
        return print_variables
 def get_sim(seed):
     model = load_model_from_mjb(pattern)
     return MjSim(model)
Example #47
0
class BaseEnv:
    def __init__(
        self,
        robot_folders,
        robot_dir,
        substeps,
        tol=0.02,
        train=True,
        with_kin=None,
        with_dyn=None,
        multi_goal=False,
    ):
        self.with_kin = with_kin
        self.with_dyn = with_dyn
        self.multi_goal = multi_goal
        self.goal_dim = 3

        if self.with_dyn:
            norm_file = os.path.join(robot_dir, 'stats/dyn_stats.json')
            with open(norm_file, 'r') as f:
                stats = json.load(f)
            self.dyn_mu = np.array(stats['mu']).reshape(-1)
            self.dyn_sigma = np.array(stats['sigma']).reshape(-1)
            self.dyn_min = np.array(stats['min']).reshape(-1)
            self.dyn_max = np.array(stats['max']).reshape(-1)

        self.nsubsteps = substeps
        self.metadata = {}
        self.reward_range = (-np.inf, np.inf)
        self.spec = None
        self.dist_tol = tol
        self.viewer = None

        self.links = [
            'gl0', 'gl1_1', 'gl1_2', 'gl2', 'gl3_1', 'gl3_2', 'gl4', 'gl5_1',
            'gl5_2', 'gl6'
        ]
        self.bodies = [
            "connector_plate_base", "electric_gripper_base",
            "gripper_l_finger", "gripper_l_finger_tip", "gripper_r_finger",
            "gripper_r_finger_tip", "l0", "l1", "l2", "l3", "l4", "l5", "l6"
        ]
        self.site_set = ['j0', 'j1', 'j2', 'j3', 'j4', 'j5', 'j6']
        self.joint_set = self.site_set

        self.robots = []
        for folder in robot_folders:
            self.robots.append(os.path.join(robot_dir, folder))

        self.dir2id = {folder: idx for idx, folder in enumerate(self.robots)}
        self.robot_num = len(self.robots)

        if train:
            self.test_robot_num = min(100, self.robot_num)
            self.train_robot_num = self.robot_num - self.test_robot_num
            self.test_robot_ids = list(
                range(self.train_robot_num, self.robot_num))
            self.train_test_robot_num = min(100, self.train_robot_num)
            self.train_test_robot_ids = list(range(self.train_test_robot_num))
            self.train_test_conditions = self.train_test_robot_num
        else:
            self.test_robot_num = self.robot_num
            self.train_robot_num = self.robot_num - self.test_robot_num
            self.test_robot_ids = list(range(self.robot_num))

        self.test_conditions = self.test_robot_num

        print('Train robots: ', self.train_robot_num)
        print('Test robots: ', self.test_robot_num)
        print('Multi goal:', self.multi_goal)
        self.reset_robot(0)

        self.ob_dim = self.get_obs()[0].size
        print('Ob dim:', self.ob_dim)

        high = np.inf * np.ones(self.ob_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        self.ep_reward = 0
        self.ep_len = 0

    def reset(self, robot_id=None):
        raise NotImplementedError

    def step(self, action):
        raise NotImplementedError

    def update_action_space(self):
        actuators = self.sim.model._actuator_name2id.keys()
        valid_joints = [ac for ac in actuators if ac in self.joint_set]
        self.act_dim = len(valid_joints)
        bounds = self.sim.model.actuator_ctrlrange[:self.act_dim]
        self.ctrl_low = np.copy(bounds[:, 0])
        self.ctrl_high = np.copy(bounds[:, 1])
        self.action_space = spaces.Box(self.ctrl_low,
                                       self.ctrl_high,
                                       dtype=np.float32)

    def scale_action(self, action):
        act_k = (self.action_space.high - self.action_space.low) / 2.
        act_b = (self.action_space.high + self.action_space.low) / 2.
        return act_k * action + act_b

    def reset_robot(self, robot_id):
        self.robot_folder_id = self.dir2id[self.robots[robot_id]]
        robot_file = os.path.join(self.robots[robot_id], 'robot.xml')
        self.model = load_model_from_path(robot_file)
        self.sim = MjSim(self.model, nsubsteps=self.nsubsteps)
        self.update_action_space()
        self.sim.reset()
        self.sim.step()
        if self.viewer is not None:
            self.viewer = MjViewer(self.sim)

    def test_reset(self, cond):
        robot_id = self.test_robot_ids[cond]
        return self.reset(robot_id=robot_id)

    def train_test_reset(self, cond):
        robot_id = self.train_test_robot_ids[cond]
        return self.reset(robot_id=robot_id)

    def cal_reward(self, s, goal, a):
        dist = np.linalg.norm(s - goal)

        if dist < self.dist_tol:
            done = True
            reward_dist = 1
        else:
            done = False
            reward_dist = -1
        reward = reward_dist
        reward -= 0.1 * np.square(a).sum()
        return reward, dist, done

    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        self.viewer.render()

    def get_obs(self):
        qpos = self.get_qpos(self.sim)
        qvel = self.get_qvel(self.sim)

        ob = np.concatenate([qpos, qvel])
        if self.with_kin:
            link_rela = self.get_xpos_xrot(self.sim)
            ob = np.concatenate([ob, link_rela])
        if self.with_dyn:
            body_mass = self.get_body_mass(self.sim)
            joint_friction = self.get_friction(self.sim)
            joint_damping = self.get_damping(self.sim)
            armature = self.get_armature(self.sim)
            dyn_vec = np.concatenate(
                (body_mass, joint_friction, joint_damping, armature))
            dyn_vec = np.divide((dyn_vec - self.dyn_min),
                                self.dyn_max - self.dyn_min)
            ob = np.concatenate([ob, dyn_vec])
        target_pos = self.sim.data.get_site_xpos('target').flatten()
        ob = np.concatenate([ob, target_pos])
        achieved_goal = self.sim.data.get_site_xpos('ref_pt')
        return ob, achieved_goal

    def get_link_length(self, sim):
        link_length = []
        for link in self.links:
            geom_id = sim.model._geom_name2id[link]
            link_length.append(sim.model.geom_size[geom_id][1])
        return np.asarray(link_length).reshape(-1)

    def get_qpos(self, sim):
        angle_noise_range = 0.02
        qpos = sim.data.qpos[:self.act_dim]
        qpos += np.random.uniform(-angle_noise_range, angle_noise_range,
                                  self.act_dim)
        qpos = np.pad(qpos, (0, 7 - self.act_dim),
                      mode='constant',
                      constant_values=0)
        return qpos.reshape(-1)

    def get_qvel(self, sim):
        velocity_noise_range = 0.02
        qvel = sim.data.qvel[:self.act_dim]
        qvel += np.random.uniform(-velocity_noise_range, velocity_noise_range,
                                  self.act_dim)
        qvel = np.pad(qvel, (0, 7 - self.act_dim),
                      mode='constant',
                      constant_values=0)
        return qvel.reshape(-1)

    def get_xpos_xrot(self, sim):
        xpos = []
        xrot = []
        for joint_id in range(self.act_dim):
            joint = sim.model._actuator_id2name[joint_id]
            if joint == 'j0':
                pos1 = sim.data.get_body_xpos('base_link')
                mat1 = sim.data.get_body_xmat('base_link')
            else:
                prev_id = joint_id - 1
                prev_joint = sim.model._actuator_id2name[prev_id]
                pos1 = sim.data.get_site_xpos(prev_joint)
                mat1 = sim.data.get_site_xmat(prev_joint)
            pos2 = sim.data.get_site_xpos(joint)
            mat2 = sim.data.get_site_xmat(joint)
            relative_pos = pos2 - pos1
            rot_euler = self.relative_rotation(mat1, mat2)
            xpos.append(relative_pos)
            xrot.append(rot_euler)
        xpos = np.array(xpos).flatten()
        xrot = np.array(xrot).flatten()
        xpos = np.pad(xpos, (0, (7 - self.act_dim) * 3),
                      mode='constant',
                      constant_values=0)
        xrot = np.pad(xrot, (0, (7 - self.act_dim) * 3),
                      mode='constant',
                      constant_values=0)
        ref_pt_xpos = self.sim.data.get_site_xpos('ref_pt')
        ref_pt_xmat = self.sim.data.get_site_xmat('ref_pt')
        relative_pos = ref_pt_xpos - pos2
        rot_euler = self.relative_rotation(mat2, ref_pt_xmat)
        xpos = np.concatenate((xpos, relative_pos.flatten()))
        xrot = np.concatenate((xrot, rot_euler.flatten()))
        pos_rot = np.concatenate((xpos, xrot))
        return pos_rot

    def get_damping(self, sim):
        damping = sim.model.dof_damping[:self.act_dim]
        damping = np.pad(damping, (0, 7 - self.act_dim),
                         mode='constant',
                         constant_values=0)
        return damping.reshape(-1)

    def get_friction(self, sim):
        friction = sim.model.dof_frictionloss[:self.act_dim]
        friction = np.pad(friction, (0, 7 - self.act_dim),
                          mode='constant',
                          constant_values=0)
        return friction.reshape(-1)

    def get_body_mass(self, sim):
        body_mass = []
        for body in self.bodies:
            body_id = sim.model._body_name2id[body]
            body_mass.append(sim.model.body_mass[body_id])
        return np.asarray(body_mass).reshape(-1)

    def get_armature(self, sim):
        armature = sim.model.dof_armature[:self.act_dim]
        armature = np.pad(armature, (0, 7 - self.act_dim),
                          mode='constant',
                          constant_values=0)
        return armature.reshape(-1)

    def relative_rotation(self, mat1, mat2):
        # return the euler x,y,z of the relative rotation
        # (w.r.t site1 coordinate system) from site2 to site1
        rela_mat = np.dot(np.linalg.inv(mat1), mat2)
        return rotations.mat2euler(rela_mat)

    def close(self):
        pass
Example #48
0
#This script renders image from xml model into png
# To run this script mujoco and mujoco-py must be installed
# Run: python3 save_rgb.py <model_with_path> <name_of_used_camera> <output_image_with_path>
import sys
from matplotlib import pyplot as plt
from mujoco_py import load_model_from_path, MjSim, functions
import numpy as np
import time

model_path = sys.argv[1]
camera_name = sys.argv[2]
img_with_path = sys.argv[3]

model = load_model_from_path(model_path)
sim = MjSim(model)

img = sim.render(width=500, height=500, camera_name=camera_name)
#print(img)
plt.imshow(img, interpolation='nearest')
plt.savefig(img_with_path)
#plt.show()
Example #49
0
def test_jacobians():
    xml = """
    <mujoco>
        <worldbody>
            <body name="body1" pos="0 0 0">
                <joint axis="1 0 0" name="a" pos="0 0 0" type="hinge"/>
                <geom name="geom1" pos="0 0 0" size="1.0"/>
                <body name="body2" pos="0 0 1">
                    <joint name="b" axis="1 0 0" pos="0 0 1" type="hinge"/>
                    <geom name="geom2" pos="1 1 1" size="0.5"/>
                    <site name="target" size="0.1"/>
                </body>
            </body>
        </worldbody>
        <actuator>
            <motor joint="a"/>
            <motor joint="b"/>
        </actuator>
    </mujoco>
    """
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    sim.reset()
    # After reset jacobians are all zeros
    target_jacp = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    target_test = np.array([0, 0, -1, 1, 0, 0])
    np.testing.assert_allclose(target_jacp, target_test)
    # Should be unchanged after steps (zero action)
    for _ in range(2):
        sim.step()
        sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert np.linalg.norm(target_jacp - target_test) < 1e-3
    # Apply a very large action, ensure jacobian unchanged after step
    sim.reset()
    sim.forward()
    sim.data.ctrl[:] = np.ones(sim.model.nu) * 1e9
    sim.step()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, target_test)
    # After large action, ensure jacobian changed after forward
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert not np.allclose(target_jacp, target_test)
    # Test the `site_jacp` property, which gets all at once
    np.testing.assert_allclose(target_jacp, sim.data.site_jacp[0])
    # Test not passing in array
    sim.reset()
    sim.forward()
    target_jacp = sim.data.get_site_jacp('target')
    np.testing.assert_allclose(target_jacp, target_test)
    # Test passing in bad array (long instead of double)
    target_jacp = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacp('target', jacp=target_jacp)
    # Test rotation jacobian - like above but 'jacr' instead of 'jacp'
    # After reset jacobians are all zeros
    sim.reset()
    target_jacr = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacr('target', jacr=target_jacr)
    np.testing.assert_allclose(target_jacr, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacr('target', jacr=target_jacr)
    target_test = np.array([1, 1, 0, 0, 0, 0])
    # Test allocating dedicated array
    target_jacr = sim.data.get_site_jacr('target')
    np.testing.assert_allclose(target_jacr, target_test)
    # Test the batch getter (all sites at once)
    np.testing.assert_allclose(target_jacr, sim.data.site_jacr[0])
    # Test passing in bad array
    target_jacr = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacr('target', jacr=target_jacr)
 def get_sim(seed):
     model = load_model_from_path_fix_paths(xml_path=pattern)
     return MjSim(model)
Example #51
0
 def __init__(self):
     #super(lab_env, self).__init__(env)
     # The real-world simulator
     self.model = load_model_from_path(xml_path)
     self.sim = MjSim(self.model)
     self.viewer = MjViewer(self.sim)
Example #52
0
with open('real.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        calculated_angle_list.append(float(row[0]) - 90)

read_index = np.arange(0, 700)

import argparse

parser = argparse.ArgumentParser(
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--render', help='render', type=bool, default=False)
args = parser.parse_args()

model = load_model_from_path("test_servo.xml")
sim = MjSim(model)

joints = ['servo_1']
joints_idx = list(map(lambda x: sim.model.joint_name2id(x), joints))
qpos = sim.data.qpos
joints_pos = qpos[joints_idx]

# viewer = MjViewer(sim)

sim_state = sim.get_state()

target_position_list = []
sim_position_list = []

i = 0
Example #53
0
def test_arrays_of_objs():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    renderer = cymj.MjRenderContext(sim, offscreen=True)
    assert len(renderer.scn.camera) == 2, "Expecting scn.camera to be available"
Example #54
0
        </body>
    </worldbody>
    <actuator>
        <position joint="j" kp="2000"/>
    </actuator>
</mujoco>
"""

fn = '''
    #define SQUARE(a) (a * a)
    void fun(const mjModel* m, mjData* d) {
        for (int i = d->ne; i < d->nefc; i++) {
            pos_sum += SQUARE(d->efc_pos[i]);
        }
    }
'''

sim = MjSim(load_model_from_xml(MODEL_XML), nsubsteps=50,
            substep_callback=fn, userdata_names=['pos_sum'])
t = 0
while t < 10:
    t += 1
    sim.data.ctrl[0] = .2
    print('t', t)
    sim.step()
    # verify that there are no contacts visible between steps
    assert sim.data.ncon == 0, 'No contacts should be detected here'
    # verify that contacts (and penetrations) are visible to substep_callback
    if t > 1:
        assert sim.data.get_userdata('pos_sum') > 0  # detected collision
class PointEnvMDP(MDP):
    def __init__(self,
                 init_mean=(-0.2, -0.2),
                 control_cost=False,
                 dense_reward=False,
                 render=False):
        xml = os.path.join(
            os.path.expanduser("~"),
            "git-repos/dm_control/dm_control/suite/point_mass.xml")
        model = load_model_from_path(xml)
        self.sim = MjSim(model)
        self.render = render
        self.init_mean = init_mean
        self.control_cost = control_cost
        self.dense_reward = dense_reward

        if self.render: self.viewer = MjViewer(self.sim)

        # Config
        self.env_name = "Point-Mass-Environment"
        self.target_position = np.array([0., 0.])
        self.target_tolerance = 0.02
        self.init_noise = 0.05

        self._initialize_mujoco_state()
        self.init_state = self.get_state()

        print("Loaded {} with dense_reward={}".format(self.env_name,
                                                      self.dense_reward))

        MDP.__init__(self, [0, 1], self._transition_func, self._reward_func,
                     self.init_state)

    def _reward_func(self, state, action):
        self.next_state = self._step(action)
        if self.render: self.viewer.render()
        if self.dense_reward:
            reward = -np.linalg.norm(self.next_state.position -
                                     self.target_position)
        else:
            reward = +0. if self.next_state.is_terminal() else -1.
        control_cost = 0.1 * np.linalg.norm(action)
        if self.control_cost: reward = reward - control_cost
        return reward

    def _transition_func(self, state, action):
        return self.next_state

    def execute_agent_action(self, action, option_idx=None):
        if self.render and option_idx is not None:
            self.viewer.add_marker(pos=np.array([0, 0, 0.1]),
                                   size=0.001 * np.ones(3),
                                   label="Option {}".format(option_idx))
        reward, next_state = super(PointEnvMDP,
                                   self).execute_agent_action(action)
        return reward, next_state

    def is_goal_state(self, state):
        position = state.features()[:2] if isinstance(
            state, PointEnvState) else state[:2]
        return self.is_goal_position(position)

    def is_goal_position(self, position):
        distance = np.linalg.norm(position - self.target_position)
        return distance <= self.target_tolerance

    def get_state(self):

        # Individually indexing to prevent Mujoco from altering state history
        x, y = self.sim.data.qpos[0], self.sim.data.qpos[1]
        x_dot, y_dot = self.sim.data.qvel[0], self.sim.data.qvel[1]

        # Create State object from simulator state
        position = np.array([x, y])
        velocity = np.array([x_dot, y_dot])

        # State is terminal when it is the goal state
        done = self.is_goal_position(position)
        state = PointEnvState(position, velocity, done)

        return state

    def _step(self, action):
        self.sim.data.ctrl[:] = action
        self.sim.step()
        return self.get_state()

    def _initialize_mujoco_state(self):
        init_position = np.array(self.init_mean) + np.random.uniform(
            0., self.init_noise, 2)
        init_state = MjSimState(time=0.,
                                qpos=init_position,
                                qvel=np.array([0., 0.]),
                                act=None,
                                udd_state={})
        self.sim.set_state(init_state)

    @staticmethod
    def state_space_size():
        return 4

    @staticmethod
    def action_space_size():
        return 2

    @staticmethod
    def is_primitive_action(action):
        return -1. <= action.all() <= 1.

    def reset(self):
        self._initialize_mujoco_state()
        self.init_state = self.get_state()

        super(PointEnvMDP, self).reset()

    def __str__(self):
        return self.env_name
Example #56
0
class SimManager():
    """"""
    def __init__(self,
                 filepath,
                 blender_path=None,
                 visualize=False,
                 num_sims=1):
        self.filepath = filepath
        self.blender_path = blender_path
        self.visualize = visualize
        self.num_sims = num_sims

        self.base_model = load_model_from_path(filepath)

        if self.num_sims > 1:
            self.pool = MjRenderPool(self.base_model,
                                     n_workers=num_sims,
                                     modder=ArenaModder)
        else:
            self.sim = MjSim(self.base_model)
            self.arena_modder = ArenaModder(self.sim)
            if self.visualize:
                self.arena_modder.visualize = True
                self.viewer = MjViewer(self.sim)
                self.arena_modder.viewer = self.viewer
            else:
                self.viewer = None

    def forward(self):
        """
        Advances simulator a step (NECESSARY TO MAKE CAMERA AND LIGHT MODDING WORK)
        """
        if self.num_sims <= 1:
            self.sim.forward()
            if self.visualize:
                # Get angle of camera and display it
                quat = np.quaternion(*self.base_model.cam_quat[0])
                ypr = quaternion.as_euler_angles(quat) * 180 / np.pi
                cam_pos = self.base_model.cam_pos[0]
                #self.viewer.add_marker(pos=cam_pos, label="CAM: {}{}".format(cam_pos, ypr))
                self.viewer.add_marker(pos=cam_pos,
                                       label="CAM: {}".format(ypr))
                self.viewer.render()

    def _get_pool_data(self):
        IMAGE_NOISE_RVARIANCE = Range(0.0, 0.0001)

        cam_imgs, ground_truths = self.pool.render(640,
                                                   360,
                                                   camera_name='camera1',
                                                   randomize=True)
        ground_truths = list(ground_truths)
        cam_imgs = list(
            cam_imgs[:, ::-1, :, :])  # Rendered images are upside-down.

        for i in range(len(cam_imgs)):
            image_noise_variance = sample(IMAGE_NOISE_RVARIANCE)
            cam_imgs[i] = (skimage.util.random_noise(
                cam_imgs[i], mode='gaussian', var=image_noise_variance) *
                           255).astype(np.uint8)
            cam_imgs[i] = preproc_image(cam_imgs[i])

        return cam_imgs, ground_truths

    def _get_cam_frame(self, display=False, ground_truth=None):
        """Grab an image from the camera (224, 244, 3) to feed into CNN"""
        IMAGE_NOISE_RVARIANCE = Range(0.0, 0.0001)
        cam_img = self.sim.render(
            640, 360, camera_name='camera1'
        )[::-1, :, :]  # Rendered images are upside-down.
        image_noise_variance = sample(IMAGE_NOISE_RVARIANCE)
        cam_img = (skimage.util.random_noise(
            cam_img, mode='gaussian', var=image_noise_variance) * 255).astype(
                np.uint8)
        cam_img = preproc_image(cam_img)
        if display:
            label = str(ground_truth[3:6])
            display_image(cam_img, label)
        return cam_img

    def _get_ground_truth(self):
        return self.arena_modder.get_ground_truth()

    def get_data(self):
        if self.num_sims > 1:
            return self._get_pool_data()
        else:
            self.arena_modder.randomize()
            gt = self._get_ground_truth()
            self.sim.forward()
            cam = self._get_cam_frame()
            return cam, gt

    def randrocks(self):
        """Generate a new set of 3 random rock meshes using a Blender script"""
        if self.blender_path is None:
            raise Exception(
                'You must install Blender and include the path to its exectubale in the constructor to use this method'
            )

        import subprocess
        subprocess.call(
            [self.blender_path, "--background", "--python", "randrock.py"])
Example #57
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        try:
            observation, _reward, done, _info = self.step(
                np.zeros(self.model.nu))
        except NotImplementedError:
            observation, _reward, done, _info = self._step(
                np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([
            o.size for o in observation
        ]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high, dtype=np.float32)

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        self.seed()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    def evaluate_success(self, paths, logger=None):
        """
        Log various success metrics calculated based on input paths into the logger
        """
        pass

    # -----------------------------

    def reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        old_state = self.sim.get_state()
        new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
                                         old_state.act, old_state.udd_state)
        self.sim.set_state(new_state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def render(self, *args, **kwargs):
        pass
        #return self.mj_render()

    def _get_viewer(self):
        pass
        #return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640, 480),
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=frame_size[0],
                                             height=frame_size[1],
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite(file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))
Example #58
0
            <joint axis="0 1 0" name="box:y" type="slide"/>
        </body>
        <body name="floor" pos="0 0 0.025">
            <geom size="1.0 1.0 0.02" rgba="0 1 0 1" type="box"/>
        </body>
    </worldbody>
</mujoco>
"""


def print_box_xpos(sim):
    print("box xpos:", sim.data.get_body_xpos("box"))


model = load_model_from_xml(MODEL_XML)
sim = MjSim(model)
viewer = MjViewer(sim)

states = [{'box:x': +0.8, 'box:y': +0.8},
          {'box:x': -0.8, 'box:y': +0.8},
          {'box:x': -0.8, 'box:y': -0.8},
          {'box:x': +0.8, 'box:y': -0.8},
          {'box:x': +0.0, 'box:y': +0.0}]

# MjModel.joint_name2id returns the index of a joint in
# MjData.qpos.
x_joint_i = sim.model.get_joint_qpos_addr("box:x")
y_joint_i = sim.model.get_joint_qpos_addr("box:y")

print_box_xpos(sim)
Example #59
0
def test_mj_sim_buffers():
    model = load_model_from_xml(BASIC_MODEL_XML)

    # test no callback
    sim = MjSim(model, nsubsteps=2)
    assert(sim.udd_state == {})

    sim.step()
    assert(sim.udd_state == {})

    # test with callback
    foo = 10
    d = {"foo": foo,
         "foo_2": np.array([foo, foo])}

    def udd_callback(sim):
        return d

    sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback)

    assert(sim.udd_state is not None)
    assert(sim.udd_state["foo"] == foo)
    assert(sim.udd_state["foo_2"].shape[0] == 2)
    assert(sim.udd_state["foo_2"][0] == foo)

    foo = 11
    d = {"foo": foo,
         "foo_2": np.array([foo, foo])}
    sim.step()
    assert(sim.udd_state is not None)
    assert(sim.udd_state["foo"] == foo)
    assert(sim.udd_state["foo_2"][0] == foo)

    d = {}
    with pytest.raises(AssertionError):
        sim.step()

    d = {"foo": foo,
         "foo_2": np.array([foo, foo]),
         "foo_3": foo}
    with pytest.raises(AssertionError):
        sim.step()

    d = {"foo": foo,
         "foo_2": np.array([foo, foo, foo])}
    with pytest.raises(AssertionError):
        sim.step()

    d = {"foo": "haha",
         "foo_2": np.array([foo, foo, foo])}
    with pytest.raises(AssertionError):
        sim.step()
Example #60
0
def general_usage(xml_path, ee_list, N):
    # ==========================================================================
    # Quantaties
    # ==========================================================================
    ee_SE3_mujoco, ee_jac_mujoco, M_mujoco, qfrc_bias_mujoco = dict(), dict(
    ), [], []
    ee_SE3_tf_rbdl, ee_jac_tf_rbdl, M_tf_rbdl, qfrc_bias_tf_rbdl = dict(
    ), dict(), None, None
    for ee in ee_list:
        ee_SE3_mujoco[ee], ee_jac_mujoco[ee], ee_SE3_tf_rbdl[
            ee], ee_jac_tf_rbdl[ee] = [], [], None, None

    # ==========================================================================
    # Mujoco
    # ==========================================================================
    mujoco_model = load_model_from_path(xml_path)
    m = MjSim(mujoco_model)
    m.forward()
    sim_state = m.get_state()
    q = np.random.uniform(-1., 1., (N, m.model.nq))
    qdot = np.random.uniform(-1., 1., (N, m.model.nq))

    for i in tqdm(range(N)):
        for j in range(m.model.nq):
            sim_state.qpos[j] = q[i, j]
            sim_state.qvel[j] = qdot[i, j]
        m.set_state(sim_state)
        m.forward()
        for ee in ee_list:
            T_ee = np.eye(4)
            T_ee[0:3, 0:3] = m.data.get_body_xmat(ee)
            T_ee[0:3, 3] = m.data.get_body_xpos(ee)
            ee_SE3_mujoco[ee].append(T_ee)

            J_ee = np.zeros((6, m.model.nq))
            jacp = np.zeros(m.model.nv * 3)
            jacr = np.zeros(m.model.nv * 3)
            functions.mj_jac(m.model, m.data, jacp, jacr,
                             m.data.get_body_xpos(ee),
                             m.model.body_name2id(ee))
            J_ee[0:3, :] = jacr.reshape(3, m.model.nv)
            J_ee[3:6, :] = jacp.reshape(3, m.model.nv)
            ee_jac_mujoco[ee].append(J_ee)
        M_ = np.zeros(m.model.nv * m.model.nv)
        functions.mj_fullM(m.model, M_, m.data.qM)
        M_mujoco.append(M_.reshape(m.model.nq, m.model.nq))
        qfrc_bias_mujoco.append(np.copy(m.data.qfrc_bias))

    # ==========================================================================
    # tf_rbdl
    # ==========================================================================
    ic = initial_config_from_mjcf(xml_path, ee_list, verbose=True)
    for ee in ee_list:
        ee_SE3_tf_rbdl[ee] = fk(ic['init_ee_SE3'][ee], ic['Blist'][ee],
                                ic['Bidlist'][ee],
                                tf.convert_to_tensor(q, tf.float32))
        ee_local_jac_tf_rbdl = jacobian(ic['Blist'][ee], ic['Bidlist'][ee],
                                        tf.convert_to_tensor(q, tf.float32))
        R = ee_SE3_tf_rbdl[ee][:, 0:3, 0:3]
        RR = tf.concat([
            tf.concat([R, tf.zeros((N, 3, 3))], 2),
            tf.concat([tf.zeros((N, 3, 3)), R], 2)
        ], 1)
        ee_jac_tf_rbdl[ee] = tf.matmul(RR, ee_local_jac_tf_rbdl)
    M_tf_rbdl = mass_matrix(
        tf.convert_to_tensor(q, tf.float32), ic['pidlist'], ic['Mlist'],
        ic['Glist'], ic['Slist']) + tf.eye(m.model.nq) * ic['joint_armature']

    cori = coriolis_forces(tf.convert_to_tensor(q, tf.float32),
                           tf.convert_to_tensor(qdot,
                                                tf.float32), ic['pidlist'],
                           ic['Mlist'], ic['Glist'], ic['Slist'])
    grav = gravity_forces(tf.convert_to_tensor(q, tf.float32), ic['g'],
                          ic['pidlist'], ic['Mlist'], ic['Glist'], ic['Slist'])
    qfrc_bias_tf_rbdl = cori + grav

    # ==========================================================================
    # Comparing
    # ==========================================================================

    # print("="*80)
    # print("q")
    # print(q)
    # print("qdot")
    # print(qdot)
    # print("="*80)
    # for i in range(N):
    # for ee in ee_list:
    # print("-"*80)
    # print("{} SE3".format(ee))
    # print("Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(ee_SE3_mujoco[ee][i], ee_SE3_tf_rbdl[ee][i].numpy(), np.isclose(ee_SE3_mujoco[ee][i], ee_SE3_tf_rbdl[ee][i].numpy(),atol=1e-5)))
    # print("-"*80)
    # print("{} Jacobian".format(ee))
    # print("Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(ee_jac_mujoco[ee][i], ee_jac_tf_rbdl[ee][i].numpy(), np.isclose(ee_jac_mujoco[ee][i], ee_jac_tf_rbdl[ee][i].numpy(),atol=1e-5)))
    # print("-"*80)
    # print("Mass Matrix")
    # print("Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(M_mujoco[i], M_tf_rbdl[i].numpy(), np.isclose(M_mujoco[i], M_tf_rbdl[i].numpy(),atol=1e-5)))
    # print("-"*80)
    # print("Bias frc")
    # print("Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy(), np.isclose(qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy(),atol=1e-5)))
    # print("="*80)

    for i in range(N):
        for ee in ee_list:
            assert np.allclose(
                ee_SE3_mujoco[ee][i],
                ee_SE3_tf_rbdl[ee][i].numpy(),
                atol=1e-04), "Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(
                    ee_SE3_mujoco[ee][i], ee_SE3_tf_rbdl[ee][i].numpy(),
                    np.isclose(ee_SE3_mujoco[ee][i],
                               ee_SE3_tf_rbdl[ee][i].numpy()))
            # print("[{} SE3] PASSED]".format(ee))
            assert np.allclose(
                ee_jac_mujoco[ee][i],
                ee_jac_tf_rbdl[ee][i].numpy(),
                atol=1e-04), "Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(
                    ee_jac_mujoco[ee][i], ee_jac_tf_rbdl[ee][i].numpy(),
                    np.isclose(ee_jac_mujoco[ee][i],
                               ee_jac_tf_rbdl[ee][i].numpy()))
            # print("[{} Jacobian] PASSED]".format(ee))
        assert np.allclose(
            M_mujoco[i], M_tf_rbdl[i].numpy(),
            atol=1e-04), "Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(
                M_mujoco[i], M_tf_rbdl[i].numpy(),
                np.isclose(M_mujoco[i], M_tf_rbdl[i].numpy()))
        # print("[Mass Matrix] PASSED]")
        assert np.allclose(
            qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy(),
            atol=1e-04), "Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(
                qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy(),
                np.isclose(qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy()))
        # print("[Coriolis + Gravity] PASSED]")
    print("[PASSED]")