def test_viewer(): model = load_model_from_path("mujoco_py/tests/test.xml") sim = MjSim(model) viewer = MjViewer(sim) for _ in range(100): sim.step() viewer.render()
def test_multiple_sims(): # Ensure that creating new simulators still produces good renderings. xml = """ <mujoco> <asset> <texture name="t1" width="32" height="32" type="2d" builtin="flat" /> <material name="m1" texture="t1" /> </asset> <worldbody> <light diffuse=".5 .5 .5" pos="0 0 5" dir="0 0 -1" /> <camera name="topcam" pos="0 0 2.5" zaxis="0 0 1" /> <geom name="g1" pos="0 0 0" type="box" size="1 1 0.1" rgba="1 1 1 1" material="m1" /> </worldbody> </mujoco> """ model = load_model_from_xml(xml) random_state = np.random.RandomState(0) for i in range(3): sim = MjSim(model) sim.forward() modder = TextureModder(sim, random_state=random_state) for j in range(2): modder.rand_checker('g1') compare_imgs( sim.render(201, 205, camera_name="topcam"), 'test_multiple_sims.loop%d_%d.png' % (i, j))
def test_data_attribute_getters(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() assert_array_equal(sim.data.get_body_xpos("body1"), [0, 0, 1]) with pytest.raises(ValueError): sim.data.get_body_xpos("body_foo") with pytest.raises(RuntimeError): sim.data.get_xpos("body1") assert len(sim.data.get_body_xquat("body1")) == 4 assert_array_equal(sim.data.get_body_xmat("body1").shape, (3, 3)) # At (0, 1, 1) since the geom is displaced in the body assert_array_equal(sim.data.get_body_xipos("body1"), [0, 1, 1]) assert_array_equal(sim.data.get_site_xpos("site1"), [1, 0, 1]) assert_array_equal(sim.data.get_site_xmat("site1").shape, (3, 3)) assert_array_equal(sim.data.get_geom_xpos("geom1"), [0.5, 0.4, 0.3]) assert_array_equal(sim.data.get_geom_xpos("geom2"), [0, 1, 1]) assert_array_equal(sim.data.get_geom_xmat("geom2").shape, (3, 3)) assert_array_equal(sim.data.get_light_xpos("light1"), [0, 0, 3]) assert_array_equal(sim.data.get_light_xdir("light1"), [0, 0, -1]) assert_array_equal(sim.data.get_camera_xpos("camera1"), [3, 0, 0]) assert_array_equal(sim.data.get_camera_xmat("camera1").shape, (3, 3)) assert_array_equal(sim.data.get_joint_xaxis("joint1"), [0, 0, 1]) assert_array_equal(sim.data.get_joint_xanchor("joint1"), [0, 0, 1])
def test_concurrent_rendering(): '''Best-effort testing that concurrent multi-threaded rendering works. The test has no guarantees around being deterministic, but if it fails you know something is wrong with concurrent rendering. If it passes, things are probably working.''' err = None def func(sim, event): event.wait() sim.data.qpos[:] = 0.0 sim.forward() img1 = sim.render(width=40, height=40, camera_name="camera1") img2 = sim.render(width=40, height=40, camera_name="camera2") try: assert np.sum(img1[:]) == 23255 assert np.sum(img2[:]) == 12007 except Exception as e: nonlocal err err = e model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.render(100, 100) event = Event() threads = [] for _ in range(100): thread = Thread(target=func, args=(sim, event)) threads.append(thread) thread.start() event.set() for thread in threads: thread.join() assert err is None, "Exception: %s" % (str(err))
def test_high_res(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() img = sim.render(1000, 1000) img = np.array(Image.fromarray(img).resize(size=(200, 200))) assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering.freecam.png')
def test_high_res(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() img = sim.render(1000, 1000) img = scipy.misc.imresize(img, (200, 200, 3)) assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering.freecam.png')
def test_ignore_mujoco_warnings(): # Two boxes on a plane need more than 1 contact (nconmax) xml = ''' <mujoco> <size nconmax="1"/> <worldbody> <geom type="plane" size="1 1 0.1"/> <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body> <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body> </worldbody> </mujoco> ''' model = load_model_from_xml(xml) sim = MjSim(model) sim.reset() with ignore_mujoco_warnings(): # This should raise an exception due to the mujoco warning callback, # but it's suppressed by the context manager. sim.step() sim.reset() with pytest.raises(Exception): # test to make sure previous warning callback restored. sim.step()
def test_viewercontext(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() renderer = cymj.MjRenderContext(sim, offscreen=True) renderer.add_marker(type=const.GEOM_SPHERE, size=np.ones(3) * 0.1, pos=np.zeros(3), mat=np.eye(3).flatten(), rgba=np.ones(4), label="mark")
def test_mj_sim_basics(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model, nsubsteps=2) sim.reset() sim.step() sim.reset() sim.forward()
def test_read_depth_buffer(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() ctx = MjRenderContext(sim, offscreen=True, opengl_backend='glfw') buf = np.zeros((11, 100), dtype=np.float32) assert buf.sum() == 0, f'{buf.sum()}' ctx.render(buf.shape[1], buf.shape[0], 0) ctx.read_pixels_depth(buf) assert buf.sum() != 0, f'{buf.sum()} {buf.max()}'
def test_glfw_context(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() render_context = MjRenderContext(sim, offscreen=True, opengl_backend='glfw') assert len(sim.render_contexts) == 1 assert sim.render_contexts[0] is render_context assert isinstance(render_context.opengl_context, GlfwContext) compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_glfw_context.png') assert len(sim.render_contexts) == 1 assert sim.render_contexts[0] is render_context
def test_joint_qpos_qvel_ops(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() # Test setting one with a list sim.data.set_joint_qpos("joint1", [1, 2, 3, 1, 0, 0, 0]) # And the other with an np.ndarray sim.data.set_joint_qvel("joint1", np.array([1, 2, 3, 0.1, 0.1, 0.1])) sim.forward() assert_array_equal(sim.data.get_joint_qpos( "joint1"), [1, 2, 3, 1, 0, 0, 0]) assert_array_equal(sim.data.get_joint_qvel( "joint1"), [1, 2, 3, 0.1, 0.1, 0.1])
def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed()
def test_textures(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.premod.png') random_state = np.random.RandomState(0) modder = TextureModder(sim, random_state=random_state) modder.whiten_materials() modder.whiten_materials(['g1', 'g2']) modder.set_rgb('g1', (255, 0, 0)) modder.set_rgb('g2', (0, 255, 0)) modder.set_rgb('g3', (0, 0, 255)) modder.set_rgb('g4', (255, 0, 255)) compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.rgb.png') modder.set_checker('g1', (255, 0, 0), (0, 255, 0)) modder.set_gradient('g2', (0, 255, 0), (0, 0, 255), vertical=True) modder.set_gradient('g3', (255, 255, 0), (0, 0, 255), vertical=False) modder.set_noise('g4', (0, 0, 255), (255, 0, 0), 0.1) compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.variety.png') modder.rand_checker('g1') modder.rand_gradient('g2') modder.rand_noise('g3') modder.rand_rgb('g4') compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.rand_specific.png') modder.rand_all('g1') modder.rand_all('g2') modder.rand_all('g3') modder.rand_all('g4') compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.rand_all.png') modder.rand_checker('g1') modder.rand_checker('g2') modder.rand_checker('g3') modder.rand_checker('g4') mat_modder = MaterialModder(sim, random_state=random_state) mat_modder.rand_texrepeat('g1') mat_modder.rand_texrepeat('g2') mat_modder.rand_texrepeat('g3') mat_modder.rand_texrepeat('g4') compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.rand_texrepeat.png')
def test_rendering(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() img, depth = sim.render(200, 200, depth=True) assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering.freecam.png') depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth)) depth = np.asarray(depth * 255, dtype=np.uint8) assert depth.shape == (200, 200) compare_imgs(depth, 'test_rendering.freecam.depth.png') img = sim.render(100, 100, camera_name="camera1") assert img.shape == (100, 100, 3) compare_imgs(img, 'test_rendering.camera1.png') img = sim.render(200, 100, camera_name="camera1") assert img.shape == (100, 200, 3) compare_imgs(img, 'test_rendering.camera1.narrow.png') render_context = sim.render_contexts[0] render_context.add_marker(size=np.array([.4, .5, .6]), pos=np.array([.4, .5, .6]), rgba=np.array([.7, .8, .9, 1.0]), label="mark") img = sim.render(200, 200, camera_name="camera1") assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering_markers.camera1.png')
def test_rendering(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() img, depth = sim.render(200, 200, depth=True) assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering.freecam.png') depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth)) depth = np.asarray(depth * 255, dtype=np.uint8) assert depth.shape == (200, 200) # Unfortunately mujoco 2.0 renders slightly different depth image on mac and on linux here if "darwin" in sys.platform.lower(): compare_imgs(depth, 'test_rendering.freecam.depth-darwin.png') else: compare_imgs(depth, 'test_rendering.freecam.depth.png') img = sim.render(100, 100, camera_name="camera1") assert img.shape == (100, 100, 3) compare_imgs(img, 'test_rendering.camera1.png') img = sim.render(200, 100, camera_name="camera1") assert img.shape == (100, 200, 3) compare_imgs(img, 'test_rendering.camera1.narrow.png') render_context = sim.render_contexts[0] render_context.add_marker(size=np.array([.4, .5, .6]), pos=np.array([.4, .5, .6]), rgba=np.array([.7, .8, .9, 1.0]), label="mark") img = sim.render(200, 200, camera_name="camera1") assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering_markers.camera1.png')
def test_ray(self): ''' Test raycasting and exclusions ''' sim = MjSim(load_model_from_xml(self.xml)) sim.forward() # Include all geoms self.check_rays(sim, [0.9, 0.1, 0.9, 0.1, 0.9, 0.1, -1.0], ['A', 'A', 'B', 'B', 'C', 'C', None]) # Include static geoms, but exclude worldbody (which contains 'A') self.check_rays(sim, [2.9, 1.9, 0.9, 0.1, 0.9, 0.1, -1.0], ['B', 'B', 'B', 'B', 'C', 'C', None], exclude_body=0) # Include static geoms, and exclude body 1 (which contains 'C') self.check_rays(sim, [0.9, 0.1, 0.9, 0.1, -1.0, -1.0, -1.0], ['A', 'A', 'B', 'B', None, None, None], exclude_body=1) # Include static geoms, and exclude body 2 (which contains 'B') self.check_rays(sim, [0.9, 0.1, 2.9, 1.9, 0.9, 0.1, -1.0], ['A', 'A', 'C', 'C', 'C', 'C', None], exclude_body=2) # Exclude static geoms ('A' is the only static geom) self.check_rays(sim, [2.9, 1.9, 0.9, 0.1, 0.9, 0.1, -1.0], ['B', 'B', 'B', 'B', 'C', 'C', None], include_static_geoms=False) # Exclude static geoms, and exclude body 1 ('C') self.check_rays(sim, [2.9, 1.9, 0.9, 0.1, -1.0, -1.0, -1.0], ['B', 'B', 'B', 'B', None, None, None], include_static_geoms=False, exclude_body=1) # Exclude static geoms, and exclude body 2 (which contains 'B') self.check_rays(sim, [4.9, 3.9, 2.9, 1.9, 0.9, 0.1, -1.0], ['C', 'C', 'C', 'C', 'C', 'C', None], include_static_geoms=False, exclude_body=2)
def test_mocap_ops(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() assert_array_equal(sim.data.get_body_xpos("mocap1"), [1, 0, 0]) assert_array_equal(sim.data.get_mocap_pos("mocap1"), [1, 0, 0]) assert_array_equal(sim.data.get_mocap_quat("mocap1"), [1, 0, 0, 0]) new_pos = [2, 1, 1] new_quat = [0.707107, 0.707107, 0, 0] sim.data.set_mocap_pos("mocap1", new_pos) sim.data.set_mocap_quat("mocap1", new_quat) sim.forward() assert_array_equal(sim.data.get_mocap_pos("mocap1"), new_pos) assert_array_almost_equal(sim.data.get_mocap_quat("mocap1"), new_quat) assert_array_equal(sim.data.get_body_xpos("mocap1"), new_pos) assert_array_almost_equal(sim.data.get_body_xquat("mocap1"), new_quat) assert_array_almost_equal(sim.data.get_body_xmat("mocap1"), [[1, 0, 0], [0, 0, -1], [0, 1, 0]])
def test_sim_save(): model = load_model_from_xml(BASIC_MODEL_XML) assert model.nkey == 0 sim = MjSim(model) with StringIO() as f: sim.save(f) f.seek(0) loaded_model = load_model_from_xml(f.read()) assert loaded_model.nkey == 1 with BytesIO() as f: sim.save(f, format='mjb') f.seek(0) loaded_model = load_model_from_mjb(f.read()) assert loaded_model.nkey == 1
def test_mj_warning_raises(): ''' Test that MuJoCo warnings cause exceptions. ''' # Two boxes on a plane need more than 1 contact (nconmax) xml = ''' <mujoco> <size nconmax="1"/> <worldbody> <geom type="plane" size="1 1 0.1"/> <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body> <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body> </worldbody> </mujoco> ''' model = load_model_from_xml(xml) sim = MjSim(model) sim.reset() with pytest.raises(Exception): # This should raise an exception due to the mujoco warning callback sim.step()
def test_xvelr(): # xvelr = rotational velocity in world frame xml = """ <mujoco> <worldbody> <body name="body1" pos="0 0 0"> <joint name="a" axis="1 0 0" pos="0 0 0" type="hinge"/> <geom name="geom1" pos="0 0 0" size="0.3"/> <body name="body2" pos="0 0 1"> <joint name="b" axis="1 0 0" pos="0 0 0" type="hinge"/> <geom name="geom2" pos="0 0 0" size="0.3"/> <site name="site1" size="0.1"/> </body> </body> </worldbody> <actuator> <motor joint="a"/> <motor joint="b"/> </actuator> </mujoco> """ model = load_model_from_xml(xml) sim = MjSim(model) sim.reset() sim.forward() # Check that xvelr starts out at zero (since qvel is zero) site1_xvelr = sim.data.get_site_xvelr('site1') np.testing.assert_allclose(site1_xvelr, np.zeros(3)) # Push the base body and step forward to get it moving sim.data.ctrl[0] = 1e9 sim.step() sim.forward() # Check that the first body has nonzero xvelr body1_xvelr = sim.data.get_body_xvelr('body1') assert not np.allclose(body1_xvelr, np.zeros(3)) # Check that the second body has zero xvelr (still) body2_xvelr = sim.data.get_body_xvelr('body2') np.testing.assert_allclose(body2_xvelr, np.zeros(3)) # Check that this matches the batch (gathered) getter property np.testing.assert_allclose(body2_xvelr, sim.data.body_xvelr[2])
def test_rendering_failing(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() sim.render(100, 100) render_context = sim.render_contexts[0] render_context.add_marker(size=np.array([.4, .5, .6]), pos=np.array([.4, .5, .6]), rgba=np.array([.7, .8, .9, 1.0]), label="blaaaa") img = sim.render(200, 200, camera_name="camera1") assert img.shape == (200, 200, 3) try: compare_imgs(img, 'test_rendering_markers.camera1.png') assert False except Exception as e: pass
def test_materials(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_materials.premod.png') random_state = np.random.RandomState(0) modder = MaterialModder(sim, random_state=random_state) modder.set_specularity('g1', 1.0) modder.set_reflectance('g2', 1.0) modder.set_shininess('g3', 1.0) compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_materials.props.png') modder.rand_all('g4') compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_materials.rand_all.png')
class Aether: """ Deus do espaço e do paraíso Faz a conexão entre o simulador e os módulos do programa """ def __init__(self): # DEFINIÇÕES self.field_width = 640 self.field_height = 480 self.cascadeTime = 0 self.cascadeLoops = 1 self.cascadeLastTime = 0 # PREPARAÇÃO model = load_model_from_path("simulator/scene.xml") self.sim = MjSim(model) self.viewer = Viewer(self.sim, self) self.ball_joint = self.sim.model.get_joint_qpos_addr("Ball")[0] self.robot_joints = [ self.sim.model.get_joint_qpos_addr("Robot_01")[0], self.sim.model.get_joint_qpos_addr("Robot_02")[0], self.sim.model.get_joint_qpos_addr("Robot_03")[0], self.sim.model.get_joint_qpos_addr("Robot_04")[0], self.sim.model.get_joint_qpos_addr("Robot_05")[0], self.sim.model.get_joint_qpos_addr("Robot_06")[0] ] # EXECUÇÃO # prepara os módulos self.enabled = [False] * 6 # [False, False, False, True, True, True] self.athena = [Athena(), Athena()] self.zeus = [Zeus(), Zeus()] Endless.setup(self.field_width, self.field_height) self.athena[0].setup(3, 0.8) self.athena[1].setup(3, 0.8) self.zeus[0].setup(3) self.zeus[1].setup(3) # inicializa o loop dos dados self.pause = False self.loopThread1 = threading.Thread(target=self.loopTeam, args=[0]) self.loopThread2 = threading.Thread(target=self.loopTeam, args=[1]) self.loopThread1.daemon = True self.loopThread2.daemon = True self.loopThread1.start() self.loopThread2.start() self.showInfos(0) self.showInfos(1) def run(self): while True: self.sim.step() self.viewer.render() def loopTeam(self, team): while True: time.sleep(0.0000000001) if self.pause or \ (not self.enabled[3 * team] and not self.enabled[3 * team + 1] and not self.enabled[3 * team + 2]): continue # executa nossos módulos positions = self.generatePositions(team) commands = self.athena[team].getTargets(positions) velocities = self.zeus[team].getVelocities(commands) # aplica resultados na simulação if self.enabled[0 + 3 * team]: self.sim.data.ctrl[0 + 6 * team] = self.convertVelocity( velocities[0]["vLeft"]) self.sim.data.ctrl[1 + 6 * team] = self.convertVelocity( velocities[0]["vRight"]) if self.enabled[1 + 3 * team]: self.sim.data.ctrl[2 + 6 * team] = self.convertVelocity( velocities[1]["vLeft"]) self.sim.data.ctrl[3 + 6 * team] = self.convertVelocity( velocities[1]["vRight"]) if self.enabled[2 + 3 * team]: self.sim.data.ctrl[4 + 6 * team] = self.convertVelocity( velocities[2]["vLeft"]) self.sim.data.ctrl[5 + 6 * team] = self.convertVelocity( velocities[2]["vRight"]) # mostra resultados self.showInfos(team, positions, commands) # indicadores 3D for i in range(3): position = positions[0][i]["position"] self.setObjectPose("indicator_" + str(i + 3 * team + 1), position, team, 0.2, velocities[i]["vector"]) # self.setObjectPose("virtual_robot_1", velocities["virtualPos"], team=0) # HELPERS def showInfos(self, team, positions=None, commands=None): infos = [] for i in range(3): if self.enabled[i + 3 * team] and positions and commands: # informações que todos os robôs tem robot = "X: " + "{:.1f}".format(positions[0][i]["position"][0]) robot += ", Y: " + "{:.1f}".format( positions[0][i]["position"][1]) robot += ", O: " + "{:.1f}".format( positions[0][i]["orientation"]) robot += ", T: " + commands[i]["tactics"] robot += ", C: " + commands[i]["command"] if commands[i]["command"] == "lookAt": if type(commands[i]["data"]["target"]) is tuple: robot += "(" + "{:.1f}".format( commands[i]["data"]["target"][0]) + ", " robot += "{:.1f}".format( commands[i]["data"]["target"][1]) + ")" else: robot += "(" + "{:.1f}".format( commands[i]["data"]["target"]) + ")" elif commands[i]["command"] == "goTo": robot += "(" + "{:.1f}".format( commands[i]["data"]["target"]["position"][0]) + ", " robot += "{:.1f}".format( commands[i]["data"]["target"]["position"][1]) + ", " if type(commands[i]["data"]["target"] ["orientation"]) is tuple: robot += "(" + "{:.1f}".format( commands[i]["data"]["target"]["orientation"] [0]) + ", " robot += "{:.1f}".format(commands[i]["data"]["target"] ["orientation"][1]) + ") )" else: robot += "{:.1f}".format( commands[i]["data"]["target"]["orientation"]) + ")" elif commands[i]["command"] == "spin": robot += "(" + commands[i]["data"]["direction"] + ")" else: robot = "[OFF]" infos.append(robot) self.viewer.infos["robots" + str(team + 1)] = infos if team == 0: if positions: self.viewer.infos["ball"] = "X: " + "{:.2f}".format(positions[2]["position"][0]) + ", Y: " + \ "{:.2f}".format(positions[2]["position"][1]) fps = self.getFPS() if fps: self.viewer.infos["fps"] = fps if commands: # indicadores 3D # print(commands[0]["futureBall"]) self.setObjectPose("virtual_ball", commands[0]["futureBall"], 0, 0.022) for i in range(3): if commands[i]["command"] == "goTo": target = commands[i]["data"]["target"]["position"] targetOrientation = commands[i]["data"]["target"][ "orientation"] if type(targetOrientation) is tuple: position = positions[0][i]["position"] targetOrientation = math.atan2( position[1] - targetOrientation[1], -(position[0] - targetOrientation[0])) self.setObjectPose("target_" + str(i + 1), target, 0, 0.01, targetOrientation) def getFPS(self): # calcula o fps e manda pra interface self.cascadeTime += time.time() - self.cascadeLastTime self.cascadeLoops += 1 self.cascadeLastTime = time.time() if self.cascadeTime > 1: fps = self.cascadeLoops / self.cascadeTime self.cascadeTime = self.cascadeLoops = 0 return "{:.2f}".format(fps) return None # FUNÇÕES def reset(self): for i in range(6): self.enabled[i] = False self.showInfos(0) self.showInfos(1) self.sim.reset() def startStop(self, pause): self.pause = pause def moveBall(self, direction, keepVel=False): if not keepVel: for i in range(6): self.sim.data.qvel[self.ball_joint + i] = 0 if direction == 0: # UP self.sim.data.qpos[self.ball_joint + 1] += 0.01 elif direction == 1: # DOWN self.sim.data.qpos[self.ball_joint + 1] -= 0.01 elif direction == 2: # LEFT self.sim.data.qpos[self.ball_joint] -= 0.01 elif direction == 3: # RIGHT self.sim.data.qpos[self.ball_joint] += 0.01 def toggleRobot(self, robotId, moveOut=False): if self.sim.data.qpos[self.robot_joints[robotId] + 1] >= 1: self.enabled[robotId] = False if moveOut: self.sim.data.qpos[self.robot_joints[robotId] + 1] = 0 elif moveOut: self.enabled[robotId] = False self.sim.data.qpos[ self.robot_joints[robotId]] = -0.62 + 0.25 * robotId self.sim.data.qpos[self.robot_joints[robotId] + 1] = 1.5 self.sim.data.qpos[self.robot_joints[robotId] + 2] = 0.04 self.sim.data.ctrl[robotId] = self.sim.data.ctrl[robotId + 1] = 0 elif self.enabled[robotId]: self.enabled[robotId] = False self.sim.data.ctrl[robotId] = self.sim.data.ctrl[robotId + 1] = 0 else: self.enabled[robotId] = True self.showInfos(0 if robotId < 3 else 1) def convertPositionX(self, coord, team): """Traz o valor pra positivo e multiplica pela proporção (largura máxima)/(posição x máxima) Args: coord: Coordenada da posição no mundo da simulação a ser convertida team: Time que está pedindo a conversão (0 ou 1) Returns: Coordenada da posição na proporção utilizada pela estratégia """ if team == 0: return (coord + 0.8083874182591296) * self.field_width / 1.6167748365182593 else: return -(coord - 0.8083874182591296 ) * self.field_width / 1.6167748365182593 def convertPositionY(self, coord, team): """Traz o valor pra positivo e multiplica pela proporção (altura máxima)/(posição y máxima) Args: coord: Coordenada da posição no mundo da simulação a ser convertida team: Time que está pedindo a conversão (0 ou 1) Returns: Coordenada da posição na proporção utilizada pela estratégia """ if team == 0: return (coord + 0.58339083) * self.field_height / 1.16678166 else: return -(coord - 0.58339083) * self.field_height / 1.16678166 @staticmethod def convertVelocity(vel): return vel * 30 def setObjectPose(self, objectName, newPos, team=0, height=0.04, newOrientation=0): """Seta a posição e orientação de um objeto no simulador Args: objectName: Nome do objeto a ter a pose alterada. Esse nome deve ser de um mocap configurado na cena. Se o objeto for virtual_robot_i, o robô é amarelo se i <= 3, azul caso contrário newPos: (x, y), 'x' e 'y' valores em pixels team: índice do time (valor em pixels inverte de acordo com o time) height: altura do objeto no universo newOrientation: orientação Z em radianos do objeto """ if team == 0: x = (newPos[0] / self.field_width) * 1.6167748365182593 - 0.8083874182591296 y = (newPos[1] / self.field_height) * 1.16678166 - 0.58339083 else: x = -(newPos[0] / self.field_width) * 1.6167748365182593 + 0.8083874182591296 y = -(newPos[1] / self.field_height) * 1.16678166 + 0.58339083 # conversão de eulerAngles para quaternions (wikipedia) newQuat = [ math.sin(newOrientation / 2), 0, 0, math.cos(newOrientation / 2) ] self.sim.data.set_mocap_quat(objectName, newQuat) self.sim.data.set_mocap_pos(objectName, np.array([x, y, height])) def generatePositions(self, team): """Cria o vetor de posições no formato esperado pela estratégia O 'sim.data.qpos' possui, em cada posição, o seguinte: 0: pos X 1: pos Y 2: pos Z 3: quat component w 4: quat component x 5: quat component y 6: quat component z Returns: Vetor de posições no formato correto """ r1 = math.pi * team - math.atan2( 2 * (self.sim.data.qpos[self.robot_joints[3 * team] + 3] * self.sim.data.qpos[self.robot_joints[3 * team] + 6] + self.sim.data.qpos[self.robot_joints[3 * team] + 4] * self.sim.data.qpos[self.robot_joints[3 * team] + 6]), 1 - 2 * (self.sim.data.qpos[self.robot_joints[3 * team] + 5] * self.sim.data.qpos[self.robot_joints[3 * team] + 5] + self.sim.data.qpos[self.robot_joints[3 * team] + 6] * self.sim.data.qpos[self.robot_joints[3 * team] + 6])) r2 = math.pi * team - math.atan2( 2 * (self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 3] * self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6] + self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 4] * self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6]), 1 - 2 * (self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 5] * self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 5] + self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6] * self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6])) r3 = math.pi * team - math.atan2( 2 * (self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 3] * self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6] + self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 4] * self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6]), 1 - 2 * (self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 5] * self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 5] + self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6] * self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6])) return [ [ # robôs aliados { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[3 * team]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[3 * team] + 1], team)), "orientation": r1, "robotLetter": "A" }, { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[1 + 3 * team]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 1], team)), "orientation": r2, "robotLetter": "B" }, { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[2 + 3 * team]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 1], team)), "orientation": r3, "robotLetter": "C" } ], [ # robôs adversários { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[(3 + 3 * team) % 6]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[(3 + 3 * team) % 6] + 1], team)), }, { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[(4 + 3 * team) % 6]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[ (4 + 3 * team) % 6] + 1], team)), }, { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[(5 + 3 * team) % 6]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[ (5 + 3 * team) % 6] + 1], team)), } ], { # bola "position": (self.convertPositionX(self.sim.data.qpos[self.ball_joint], team), self.convertPositionY(self.sim.data.qpos[self.ball_joint + 1], team)) } ]
import mujoco_py import numpy as np from gym import spaces from mujoco_py import load_model_from_path, MjSim, MjViewer from HCPI.util import seeding if __name__ == '__main__': desp = 'Display Robot' parser = argparse.ArgumentParser(description=desp) parser.add_argument('--robot_file', type=str, default='../xml/simrobot/6dof/pusher.xml') args = parser.parse_args() np.set_printoptions(precision=6, suppress=True) print('Displaying robot from:', os.path.abspath(args.robot_file)) model = load_model_from_path(args.robot_file) sim = MjSim(model, nsubsteps=20) sim.reset() sim.step() # sim.render(64,64) # observation viewer = MjViewer(sim) while True: ctrl = (np.random.random(8) - 0.5) * 2 ctrl = np.multiply(ctrl, np.array([40, 2, 2, 9, 9, 0, 0, 0])) ctrl[0] = -40 sim.data.ctrl[:] = ctrl sim.step() viewer.render()
def test_sim_state(): model = load_model_from_xml(BASIC_MODEL_XML) foo = 10 d = {"foo": foo, "foo_array": np.array([foo, foo, foo]), "foo_2darray": np.reshape(np.array([foo, foo, foo, foo]), (2, 2)), } def udd_callback(sim): return d sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback) state = sim.get_state() assert np.array_equal(state.time, sim.data.time) assert np.array_equal(state.qpos, sim.data.qpos) assert np.array_equal(state.qvel, sim.data.qvel) assert np.array_equal(state.act, sim.data.act) for k in state.udd_state.keys(): if (isinstance(state.udd_state[k], Number)): assert state.udd_state[k] == sim.udd_state[k] else: assert np.array_equal(state.udd_state[k], sim.udd_state[k]) # test flatten, unflatten a = state.flatten() assert len(a) == (1 + sim.model.nq + sim.model.nv + sim.model.na + 8) state2 = MjSimState.from_flattened(a, sim) assert np.array_equal(state.time, sim.data.time) assert np.array_equal(state.qpos, sim.data.qpos) assert np.array_equal(state.qvel, sim.data.qvel) assert np.array_equal(state.act, sim.data.act) for k in state2.udd_state.keys(): if (isinstance(state2.udd_state[k], Number)): assert state2.udd_state[k] == sim.udd_state[k] else: assert np.array_equal(state2.udd_state[k], sim.udd_state[k]) assert state2 == state assert not state2 != state # test equality with deleting keys state2 = state2._replace(udd_state={"foo": foo}) assert state2 != state assert not (state2 == state) # test equality with changing contents of array state2 = state2._replace( udd_state={"foo": foo, "foo_array": np.array([foo, foo + 1])}) assert state2 != state assert not (state2 == state) # test equality with adding keys d2 = dict(d) d2.update({"not_foo": foo}) state2 = state2._replace(udd_state=d2) assert state2 != state assert not (state2 == state) # test defensive copy sim.set_state(state) state.qpos[0] = -1 assert not np.array_equal(state.qpos, sim.data.qpos) state3 = sim.get_state() state3.qpos[0] = -1 assert not np.array_equal(state3.qpos, sim.data.qpos) state3.udd_state["foo_array"][0] = -1 assert not np.array_equal( state3.udd_state["foo_array"], sim.udd_state["foo_array"]) # test no callback sim = MjSim(model, nsubsteps=2) state = sim.get_state() print("state.udd_state = %s" % state.udd_state) assert state.udd_state == {} # test flatten, unflatten a = state.flatten() assert len(a) == 1 + sim.model.nq + sim.model.nv + sim.model.na state2 = MjSimState.from_flattened(a, sim) assert np.array_equal(state.time, sim.data.time) assert np.array_equal(state.qpos, sim.data.qpos) assert np.array_equal(state.qvel, sim.data.qvel) assert np.array_equal(state.act, sim.data.act) assert state.udd_state == sim.udd_state
class Square2dEnv(Env): # TODO make this into GoalEnv def __init__(self, model_path='./square2d.xml', distance_threshold=1e-1, frame_skip=2, goal=[0.3, 0.3], horizon=100): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.model = load_model_from_path(fullpath) self.seed() self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None self.distance_threshold = distance_threshold self.frame_skip = frame_skip self.set_goal_location(goal) self.reward_type = 'dense' self.horizon = horizon self.time_step = 0 obs = self.get_current_observation() self.action_space = spaces.Box(-1., 1., shape=(2, )) self.observation_space = spaces.Box(-np.inf, np.inf, shape=obs.shape) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): self.set_ball_location([0., 0.]) self.sim.forward() self.time_step = 0 return self.get_current_observation() def _get_viewer(self): if self.viewer is None: self.viewer = MjViewer(self.sim) self.viewer_setup() return self.viewer def viewer_setup(self): self.viewer.cam.lookat[ 0] = 0.0 # x,y,z offset from the object (works if trackbodyid=-1) self.viewer.cam.lookat[1] = 0.0 self.viewer.cam.lookat[2] = 0.0 self.viewer.cam.elevation = -90 # camera rotation around the axis in the plane going through the frame origin (if 0 you just see a line) self.viewer.cam.azimuth = 90 self.viewer.cam.distance = 1.5 def set_goal_location(self, goalPos): # goal = [xLoc, yLoc] assert np.linalg.norm(np.asarray(goalPos) - np.asarray([0.3, 0.3]), axis=-1) < 0.1 self.sim.data.qpos[0] = goalPos[0] self.sim.data.qpos[1] = goalPos[1] def set_ball_location(self, ballPos): self.sim.data.qpos[2] = ballPos[0] self.sim.data.qpos[3] = ballPos[1] def set_distance_threshold(self, distance_threshold): self.distance_threshold = distance_threshold def set_frame_skip(self, frame_skip): self.frame_skip = frame_skip def get_frame_skip(self): return self.frame_skip def get_distance_threshold(self): return self.distance_threshold def get_ball_location(self): return self.sim.data.qpos[2:4] def get_goal_location(self): return self.sim.data.qpos[0:2] def get_ball_velocity(self): return self.sim.data.qvel[2:4] def send_control_command(self, xDirectionControl, yDirectionControl): self.sim.data.ctrl[0] = xDirectionControl self.sim.data.ctrl[1] = yDirectionControl def get_current_observation(self): obs = np.concatenate([ self.get_goal_location(), self.get_ball_location(), self.get_ball_velocity() ]).ravel() return obs.copy() # obs = np.concatenate([self.get_ball_location(), self.get_ball_velocity()]).ravel() # desired_goal = self.get_goal_location() # achieved_goal = self.get_ball_location() # return { # 'observation': obs.copy(), # 'achieved_goal': achieved_goal.copy(), # 'desired_goal': desired_goal.copy() # # } def get_image_of_goal_observation(self, xLoc=None, yLoc=None): if not xLoc: xLoc = self.sim.data.qpos[0] if not yLoc: yLoc = self.sim.data.qpos[1] self.sim.data.qpos[0] = xLoc self.sim.data.qpos[1] = yLoc self.sim.data.qpos[2] = xLoc self.sim.data.qpos[3] = yLoc self.render() def do_simulation(self, ctrl, n_frames): self.send_control_command(ctrl[0], ctrl[1]) for _ in range(n_frames): self.take_step() def step(self, ctrl): if np.linalg.norm(self.get_goal_location() - [0.3, 0.3], axis=-1) > 0.1: print(self.get_goal_location()) # assert False ctrl = np.clip(ctrl, -1., 1.) self.do_simulation(ctrl, self.frame_skip) obs = self.get_current_observation() info = {} reward = self.compute_reward(self.get_ball_location(), self.get_goal_location(), {}) done = (reward == 1.0) self.time_step += 1 if self.time_step >= self.horizon: done = True return obs, reward, done, info def take_step(self): self.sim.step() def render(self, mode='human'): self._get_viewer().render() def compute_reward(self, achieved_goal, desired_goal, info): # Compute distance between goal and the achieved goal. d = np.linalg.norm(achieved_goal - desired_goal, axis=-1) if self.reward_type == 'sparse': return -(d > self.distance_threshold).astype(np.float32) else: return -d def _is_success(self, achieved_goal, desired_goal): d = np.linalg.norm(achieved_goal - desired_goal, axis=-1) return (d < self.distance_threshold).astype(np.float32) def log_diagnostics(self, paths): pass def terminate(self): pass
def test_tableBoundViolation(): model = load_model_from_path("robot.xml") sim = MjSim(model) test1 = np.array([-0.0031, -0.9718, -0.7269, -2.4357, -0.0157, 1.5763, 0.7303]) #False test2 = np.array([0.0264, -0.0772, 0.1924, -2.8478, -0.0273, 2.8339, 0.7566]) #True test3 = np.array([-1.4870, -1.7289, 1.6138, -1.9814, -0.9856, 1.9304, 0.9981]) #True test4 = np.array([-0.5250, -0.6410, 0.1893, -1.3827, -0.2573, 2.1356, 0.7116]) #False test5 = np.array([-0.0133, 0.9846, 0.0365, -1.5491, 2.8629, 0.7630, 0.6254]) #True qd = np.zeros(7) state = MjSimState(time=0,qpos=test1,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test1:", "Passed" if not bounds.tableBoundViolation(sim) else "Failed") state = MjSimState(time=0,qpos=test2,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test2:", "Passed" if bounds.tableBoundViolation(sim) else "Failed") state = MjSimState(time=0,qpos=test3,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test3:", "Passed" if bounds.tableBoundViolation(sim) else "Failed") state = MjSimState(time=0,qpos=test4,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test4:", "Passed" if not bounds.tableBoundViolation(sim) else "Failed") state = MjSimState(time=0,qpos=test5,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test5:", "Passed" if bounds.tableBoundViolation(sim) else "Failed")
def main(args): if not os.path.isdir(args.result_dir): os.makedirs(args.result_dir) parent = os.path.dirname(os.path.abspath(__file__)) # load test xml files test_file = os.path.join( parent, 'tests/test_xmls/temp_1_{}.pickle'.format(args.case)) params = pickle.load(open(test_file, 'rb')) # params = params[:6] if args.shuffle: random.shuffle(params) num_test = len(params) print(' ++++++++++++++++++++++++++') print(' +++ Now running case {} +++'.format(args.case)) print(' ++++++++++++++++++++++++++\n\n') # Create our policy net and a target net policy_net = DRQN(args.ftdim, args.outdim).to(args.device) if args.position: tactile_net = TactileNet(args.indim - 6, args.ftdim).to(args.device) elif args.force: tactile_net = TactileNet(args.indim - 390, args.ftdim).to(args.device) else: tactile_net = TactileNet(args.indim, args.ftdim).to(args.device) # Setup the state normalizer normalizer = Multimodal_Normalizer(num_inputs=args.indim, device=args.device) if args.weight_policy: checkpoint = torch.load(args.weight_policy) policy_net.load_state_dict(checkpoint['policy_net_1']) if args.weight_tactile: checkpoint = torch.load(args.weight_tactile) tactile_net.load_state_dict(checkpoint['tactile_net_1']) if args.normalizer_file: if os.path.exists(args.normalizer_file): normalizer.restore_state(args.normalizer_file) # Create robot, reset simulation and grasp handle model = load_model_from_path(args.model_path) sim = MjSim(model) sim_param = SimParameter(sim) sim.step() if args.render: viewer = MjViewer(sim) else: viewer = None robot = RobotSim(sim, viewer, sim_param, args.render, args.break_thresh) tactile_obs_space = TactileObs( robot.get_gripper_xpos(), # 24 robot.get_all_touch_buffer(args.hap_sample)) # 30 x 6 performance = { 'time': [], 'success': [], 'num_broken': [], 'tendon_hist': [0, 0, 0, 0, 0], 'collision': [] } for i in range(num_test): velcro_params = params[i] geom, origin_offset, euler, radius = velcro_params print('\n\nTest {} Velcro parameters are: {}, {}, {}, {}'.format( i, geom, origin_offset, euler, radius)) change_sim(robot.mj_sim, geom, origin_offset, euler, radius) robot.reset_simulation() ret = robot.grasp_handle() performance = test_network(args, policy_net, tactile_net, normalizer, robot, tactile_obs_space, performance) print('Success: {}, time: {}, num_broken: {}, collision:{} '.format( performance['success'][-1], performance['time'][-1], performance['num_broken'][-1], performance['collision'][-1])) print('Finished opening velcro with haptics test \n') success = np.array(performance['success']) time = np.array(performance['time']) print('Successfully opened the velcro in: {}% of cases'.format( 100 * np.sum(success) / len(performance['success']))) print('Average time to open: {}'.format(np.average(time[success > 0]))) # print('Action histogram for the test is: {}'.format(performance['action_hist'])) # collision = np.array(performance['collision']) # threshold = 3000 # high_success = float(np.sum(success[collision<threshold])) / float(np.sum(np.ones(num_test)[collision<threshold])) # low_success = float(np.sum(success[collision>threshold])) / float(np.sum(np.ones(num_test)[collision>threshold])) # print('high_success: {} low_success: {} '.format(high_success, low_success)) ablation = 'None' if args.position: ablation = 'position' if args.force: ablation = 'force' checkpoint = args.weight_policy.split('/')[-1] out_fname = 'case{}_{}_{}.txt'.format(args.case, ablation, checkpoint) with open(os.path.join(args.result_dir, out_fname), 'w+') as f: f.write('Time: {}\n'.format(performance['time'])) f.write('Success: {}\n'.format(performance['success'])) f.write('Successfully opened the velcro in: {}% of cases\n'.format( 100 * np.sum(success) / len(performance['success']))) f.write('Average time to open: {}\n'.format( np.average(time[success > 0]))) f.write('Num_broken: {}\n'.format(performance['num_broken'])) f.write('Tendon histogram: {}\n'.format(performance['tendon_hist'])) f.write('collision: {}\n'.format(performance['collision'])) # f.write('high_success: {} low_success: {} '.format(high_success, low_success)) f.close()
class MujocoEnv(metaclass=EnvMeta): """Initializes a Mujoco Environment.""" parameters_spec = {} def __init__( self, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, use_camera_obs=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every simulated second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). use_camera_obs (bool): if True, every observation includes a rendered image. camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ self.has_renderer = has_renderer self.has_offscreen_renderer = has_offscreen_renderer self.render_collision_mesh = render_collision_mesh self.render_visual_mesh = render_visual_mesh self.control_freq = control_freq self.horizon = horizon self.ignore_done = ignore_done self.viewer = None self.model = None # settings for camera observations self.use_camera_obs = use_camera_obs if self.use_camera_obs and not self.has_offscreen_renderer: raise ValueError( "Camera observations require an offscreen renderer.") self.camera_name = camera_name if self.use_camera_obs and self.camera_name is None: raise ValueError("Must specify camera name when using camera obs") self.camera_height = camera_height self.camera_width = camera_width self.camera_depth = camera_depth self._reset_internal() def initialize_time(self, control_freq): """ Initializes the time constants used for simulation. """ self.cur_time = 0 self.model_timestep = self.sim.model.opt.timestep if self.model_timestep <= 0: raise XMLError("xml model defined non-positive time step") self.control_freq = control_freq if control_freq <= 0: raise SimulationError( "control frequency {} is invalid".format(control_freq)) self.control_timestep = 1. / control_freq def _load_model(self): """Loads an xml model, puts it in self.model""" pass def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ pass def reset(self, **kwargs): """Resets simulation.""" # TODO(yukez): investigate black screen of death # if there is an active viewer window, destroy it self._destroy_viewer() self.reset_props(**kwargs) self._reset_internal() self.sim.forward() return self._get_observation() def reset_props(self): print( 'INFO from GZZ: this is the base class reset_props. This means the environment does not support domain randomization' ) def init_viewer(self): print('init_viewer', self.viewer) if self.has_offscreen_renderer: print( 'WARNING from GZZ: robosuite has a bug on simutaneously rendering for both offscreen (like camera obs) and onscreen (X window), and will give (at least) onscreen `black screen of death` after a reset. Please check their issue: https://github.com/StanfordVL/robosuite/issues/25 . I failed to help them debug on this.' ) self.viewer = MujocoPyRenderer(self.sim) self.viewer.viewer.vopt.geomgroup[0] = (1 if self.render_collision_mesh else 0) self.viewer.viewer.vopt.geomgroup[ 1] = 1 if self.render_visual_mesh else 0 # hiding the overlay speeds up rendering significantly self.viewer.viewer._hide_overlay = True def _reset_internal(self): """Resets simulation internal configurations.""" # instantiate simulation from MJCF model self._load_model() self.mjpy_model = self.model.get_model(mode="mujoco_py") self.sim = MjSim(self.mjpy_model) self.initialize_time(self.control_freq) # create visualization screen or renderer if self.has_renderer and self.viewer is None: self.init_viewer() elif self.has_offscreen_renderer: if self.sim._render_context_offscreen is None: render_context = MjRenderContextOffscreen(self.sim) self.sim.add_render_context(render_context) self.sim._render_context_offscreen.vopt.geomgroup[0] = ( 1 if self.render_collision_mesh else 0) self.sim._render_context_offscreen.vopt.geomgroup[1] = ( 1 if self.render_visual_mesh else 0) # additional housekeeping self.sim_state_initial = self.sim.get_state() self._get_reference() self.cur_time = 0 self.timestep = 0 self.done = False def _get_observation(self): """Returns an OrderedDict containing observations [(name_string, np.array), ...].""" return OrderedDict() def step(self, action): """Takes a step in simulation with control command @action.""" if self.done: raise ValueError("executing action in terminated episode") self.timestep += 1 self._pre_action(action) end_time = self.cur_time + self.control_timestep while self.cur_time < end_time: self.sim.step() self.cur_time += self.model_timestep reward, done, info = self._post_action(action) return self._get_observation(), reward, done, info def _pre_action(self, action): """Do any preprocessing before taking an action.""" self.sim.data.ctrl[:] = action def _post_action(self, action): """Do any housekeeping after taking an action.""" reward = self.reward(action) # done if number of elapsed timesteps is greater than horizon self.done = (self.timestep >= self.horizon) and not self.ignore_done return reward, self.done, {} def reward(self, action): """Reward should be a function of state and action.""" return 0 def render(self): """ Renders to an on-screen window. """ if self.viewer is None: self.has_renderer = True self.init_viewer() self.viewer.render() def observation_spec(self): """ Returns an observation as observation specification. An alternative design is to return an OrderedDict where the keys are the observation names and the values are the shapes of observations. We leave this alternative implementation commented out, as we find the current design is easier to use in practice. """ observation = self._get_observation() return observation # observation_spec = OrderedDict() # for k, v in observation.items(): # observation_spec[k] = v.shape # return observation_spec def action_spec(self): """ Action specification should be implemented in subclasses. Action space is represented by a tuple of (low, high), which are two numpy vectors that specify the min/max action limits per dimension. """ raise NotImplementedError def reset_from_xml_string(self, xml_string): """Reloads the environment from an XML description of the environment.""" # if there is an active viewer window, destroy it self.close() # load model from xml self.mjpy_model = load_model_from_xml(xml_string) self.sim = MjSim(self.mjpy_model) self.initialize_time(self.control_freq) if self.has_renderer and self.viewer is None: self.init_viewer() elif self.has_offscreen_renderer: render_context = MjRenderContextOffscreen(self.sim) render_context.vopt.geomgroup[ 0] = 1 if self.render_collision_mesh else 0 render_context.vopt.geomgroup[ 1] = 1 if self.render_visual_mesh else 0 self.sim.add_render_context(render_context) self.sim_state_initial = self.sim.get_state() self._get_reference() self.cur_time = 0 self.timestep = 0 self.done = False # necessary to refresh MjData self.sim.forward() def find_contacts(self, geoms_1, geoms_2): """ Finds contact between two geom groups. Args: geoms_1: a list of geom names (string) geoms_2: another list of geom names (string) Returns: iterator of all contacts between @geoms_1 and @geoms_2 """ for contact in self.sim.data.contact[0:self.sim.data.ncon]: # check contact geom in geoms c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1 c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2 # check contact geom in geoms (flipped) c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1 c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2 if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1): yield contact def _check_contact(self): """Returns True if gripper is in contact with an object.""" return False def _check_success(self): """ Returns True if task has been completed. """ return False def _destroy_viewer(self): # if there is an active viewer window, destroy it if self.viewer is not None: print('_destroy_viewer', self.viewer) self.viewer.close() # change this to viewer.finish()? self.viewer = None def close(self): """Do any cleanup necessary here.""" self._destroy_viewer()
<body name="box" pos="-0.8 0 0.2"> <geom mass="0.1" size="0.15 0.15 0.15" type="box"/> </body> <body name="floor" pos="0 0 0.025"> <geom condim="3" size="1.0 1.0 0.02" rgba="0 1 0 1" type="box"/> </body> </worldbody> <actuator> <motor gear="2000.0" joint="slide0"/> <motor gear="2000.0" joint="slide1"/> </actuator> </mujoco> """ model = load_model_from_xml(MODEL_XML) sim = MjSim(model) # viewer = MjViewer(sim) # t = 0 # while True: # sim.data.ctrl[0] = math.cos(t / 10.) * 0.01 # sim.data.ctrl[1] = math.sin(t / 10.) * 0.01 # t += 1 # sim.step() # viewer.render() # if t > 100 and os.getenv('TESTING') is not None: # break renderer = MjBatchRenderer(100, 100, use_cuda=True) renderer.render(sim) renderer.map() image = renderer.read()
# Initializing servos servo1 = Servo(coordinates=(0, 0, 0.5)) servo2 = Servo(coordinates=(0, 0, 0)) # servo3 = Servo(coordinates = (0, 0, 0)) # Joint connecting the servos servo2.attach_to_servo(servo1) # servo3.join_servo(servo2, 1) # Building the world worldbody = World_body() servo1_body = Servo_body(servo1) servo2_body = Servo_body(servo2) servo1_body.append_servo_body(servo2_body) worldbody.append_servo_body(servo1_body) build_xml_file(worldbody.root) TEST_XML = worldbody.get_root_str() # Building the model model = load_model_from_xml(TEST_XML) sim = MjSim(model) viewer = MjViewer(sim) t = 0 while True: t += 1 sim.step() viewer.render() if t > 100 and os.getenv('TESTING') is not None: break
def get_sim(seed): geom_type = ["box", "sphere"][seed % 2] model = load_model_from_xml(xml.format(geom_type=geom_type)) return MjSim(model)
from mujoco_py import load_model_from_path, MjSim, MjViewer, functions import mujoco_py.generated.const as const # Load the model as defined in: # https://github.com/deepmind/dm_control/blob/master/dm_control/suite/humanoid.xml MODEL_PATH = '../assets/examples/dm/humanoid.xml' model = load_model_from_path(MODEL_PATH) def render_callback(sim, viewer): pass sim = MjSim(model, render_callback=render_callback) viewer = MjViewer(sim) t = 0 while True: # All constants, including enum accessors are listed under: # https://github.com/openai/mujoco-py/blob/master/mujoco_py/generated/const.py # An MjSim instance has multiple properties. # See: https://github.com/openai/mujoco-py/blob/master/mujoco_py/mjsim.pyx # For a cython file, properties are listed as cdef expressions # sim.get_state() # Returns an MjSimState instance # See: https://github.com/openai/mujoco-py/blob/master/mujoco_py/mjsimstate.pyx # Has properties: time, qpos, qvel, act, uddstate. # These are generally just convenience properties pulled out of PyMjModel and PyMjData,
<motor gear="100" joint="right_hip" name="right_hip"/> <motor gear="200" joint="right_knee" name="right_knee"/> <motor gear="100" joint="left_hip" name="left_hip"/> <motor gear="200" joint="left_knee" name="left_knee"/> <motor gear="25" joint="right_shoulder1" name="right_shoulder1"/> <motor gear="25" joint="right_shoulder2" name="right_shoulder2"/> <motor gear="25" joint="right_elbow" name="right_elbow"/> <motor gear="25" joint="left_shoulder1" name="left_shoulder1"/> <motor gear="25" joint="left_shoulder2" name="left_shoulder2"/> <motor gear="25" joint="left_elbow" name="left_elbow"/> </actuator> </mujoco> """ model = load_model_from_xml(MODEL_XML) sim = MjSim(model) viewer = MjViewer(sim) x = np.load('results/k_tree.npy') sim_state = sim.get_state() print(sim_state.qpos) step = 0 while True: sim_state = sim.get_state() #for i in range(3, 7): # sim_state.qpos[i] = y[i] print(sim_state.qpos) for i in range(10, 24):
class BaseMujocoEnv(BaseEnv): def __init__(self, model_path, _hp): self._frame_height = _hp.viewer_image_height self._frame_width = _hp.viewer_image_width self._reset_sim(model_path) self._base_adim, self._base_sdim = None, None #state/action dimension of Mujoco control self._adim, self._sdim = None, None #state/action dimension presented to agent self.num_objects, self._n_joints = None, None self._goal_obj_pose = None self._goaldistances = [] self._ncam = _hp.ncam if self._ncam == 2: self.cameras = ['maincam', 'leftcam'] elif self._ncam == 1: self.cameras = ['maincam'] else: raise ValueError self._last_obs = None self._hp = _hp def _default_hparams(self): parent_params = super()._default_hparams() parent_params.add_hparam('viewer_image_height', 480) parent_params.add_hparam('viewer_image_width', 640) parent_params.add_hparam('ncam', 1) return parent_params def set_goal_obj_pose(self, pose): self._goal_obj_pose = pose def _reset_sim(self, model_path): """ Creates a MjSim from passed in model_path :param model_path: Absolute path to model file :return: None """ self._model_path = model_path self.sim = MjSim(load_model_from_path(self._model_path)) def reset(self): self._goaldistances = [] def render(self): """ Renders the enviornment. Implements custom rendering support. If mode is: - dual: renders both left and main cameras - left: renders only left camera - main: renders only main (front) camera :param mode: Mode to render with (dual by default) :return: uint8 numpy array with rendering from sim """ images = np.zeros( (self._ncam, self._frame_height, self._frame_width, 3), dtype=np.uint8) for i, cam in enumerate(self.cameras): images[i] = self.sim.render(self._frame_width, self._frame_height, camera_name=cam) return images def project_point(self, point, camera): model_matrix = np.zeros((4, 4)) model_matrix[:3, :3] = self.sim.data.get_camera_xmat(camera).T model_matrix[-1, -1] = 1 fovy_radians = np.deg2rad( self.sim.model.cam_fovy[self.sim.model.camera_name2id(camera)]) uh = 1. / np.tan(fovy_radians / 2) uw = uh / (self._frame_width / self._frame_height) extent = self.sim.model.stat.extent far, near = self.sim.model.vis.map.zfar * extent, self.sim.model.vis.map.znear * extent view_matrix = np.array([ [uw, 0., 0., 0.], # matrix definition from [ 0., uh, 0., 0. ], # https://stackoverflow.com/questions/18404890/how-to-build-perspective-projection-matrix-no-api [0., 0., far / (far - near), -1.], [0., 0., -2 * far * near / (far - near), 0.] ]) # Note Mujoco doubles this quantity MVP_matrix = view_matrix.dot(model_matrix) world_coord = np.ones((4, 1)) world_coord[:3, 0] = point - self.sim.data.get_camera_xpos(camera) clip = MVP_matrix.dot(world_coord) ndc = clip[:3] / clip[3] # everything should now be in -1 to 1!! col, row = (ndc[0] + 1) * self._frame_width / 2, ( -ndc[1] + 1) * self._frame_height / 2 return self._frame_height - row, col # rendering flipped around in height def get_desig_pix(self, target_width, round=True, obj_poses=None): qpos_dim = self._n_joints # the states contains pos and vel assert self.sim.data.qpos.shape[0] == qpos_dim + 7 * self.num_objects desig_pix = np.zeros([self._ncam, self.num_objects, 2], dtype=np.int) ratio = self._frame_width / target_width for icam, cam in enumerate(self.cameras): for i in range(self.num_objects): if obj_poses is None: fullpose = self.sim.data.qpos[i * 7 + qpos_dim:(i + 1) * 7 + qpos_dim].squeeze() chosen_point = fullpose[:3] else: chosen_point = obj_poses[i, :3] d = self.project_point(chosen_point, cam) d = np.stack(d) / ratio if round: d = np.around(d).astype(np.int) desig_pix[icam, i] = d.squeeze() return desig_pix def get_goal_pix(self, target_width, round=True): goal_pix = np.zeros([self._ncam, self.num_objects, 2], dtype=np.int) ratio = self._frame_width / target_width for icam, cam in enumerate(self.cameras): for i in range(self.num_objects): g = self.project_point(self._goal_obj_pose[i, :3], cam) g = np.stack(g) / ratio if round: g = np.around(g).astype(np.int) goal_pix[icam, i] = g.squeeze() return goal_pix def eval(self, target_width=None, save_dir=None, ntasks=None): self._goaldistances.append(self.get_distance_score()) stats = {} stats['improvement'] = self._goaldistances[0] - self._goaldistances[-1] stats['initial_dist'] = self._goaldistances[0] stats['final_dist'] = self._goaldistances[-1] return stats def get_distance_score(self): """ :return: mean of the distances between all objects and goals """ abs_distances = [] for i_ob in range(self.num_objects): goal_pos = self._goal_obj_pose[i_ob, :3] curr_pos = self.sim.data.qpos[self._n_joints + i_ob * 7:self._n_joints + 3 + i_ob * 7] abs_distances.append(np.linalg.norm(goal_pos - curr_pos)) return np.mean(np.array(abs_distances)) def snapshot_noarm(self): raise NotImplementedError @property def adim(self): return self._adim @property def sdim(self): return self._sdim @property def ncam(self): return self._ncam
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass # ----------------------------- def _reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 1.0 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self): return None def state_vector(self): state = self.sim.get_state() return np.concatenate([ state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 curr_frame = self.sim.render(width=640, height=480, mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1,:,:]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + ".mp4" skvideo.io.vwrite( file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f"% (t1-t0))
class Dribble_Env(object): def __init__(self): self.model = load_model_from_path("./xml/world5.xml") self.sim = MjSim(self.model) # self.viewer = MyMjViewer(self.sim) self.viewer = MyMjViewer(self.sim) self.max_vel = [-1000,1000] self.x_motor = 0 self.y_motor = 0 self.date_time = datetime.datetime.now() self.path = "./datas/path_date" + str(self.date_time.strftime("_%Y%m%d_%H%M%S")) os.mkdir(self.path) def step(self,action): self.x_motor = ((action %3)-1) * 200 self.y_motor = ((action//3)-1) * 200 self.sim.data.ctrl[0] = self.x_motor self.sim.data.ctrl[1] = self.y_motor self.sim.step() def get_state(self): robot_x, robot_y = self.sim.data.body_xpos[1][0:2] robot_xv, robot_yv = self.sim.data.qvel[0:2] ball_x, ball_y = self.sim.data.body_xpos[2][0:2] ball_xv, ball_yv = self.sim.data.qvel[2:4] ball_pos_local = -(robot_x - ball_x), -(robot_y - ball_y) object1_x, object1_y= self.sim.data.body_xpos[3][0:2] object2_x, object2_y= self.sim.data.body_xpos[4][0:2] object3_x, object3_y= self.sim.data.body_xpos[5][0:2] object4_x, object4_y= self.sim.data.body_xpos[6][0:2] closest_object_id = np.argmin([np.linalg.norm([object1_x - robot_x, object1_y - robot_y]),\ np.linalg.norm([object2_x - robot_x, object2_y - robot_y]),\ np.linalg.norm([object3_x - robot_x, object3_y - robot_y]),\ np.linalg.norm([object4_x - robot_x, object4_y - robot_y])]) object_local_x = -(robot_x - self.sim.data.body_xpos[closest_object_id+3][0]) object_local_y = -(robot_y - self.sim.data.body_xpos[closest_object_id+3][1]) return [robot_x, robot_y, ball_pos_local[0], ball_pos_local[1], \ robot_xv, robot_yv,object_local_x,object_local_y,ball_x, ball_y,ball_xv,ball_yv] def check_done(self): ball_x ,ball_y = self.get_state()[8:10] if ball_x > 80 and -25 < ball_y < 25: return True else: return False def check_wall(self): ball_x, ball_y = self.get_state()[8:10] if math.fabs(ball_y) > 51: return True elif ball_x > 81 and math.fabs(ball_y) > 25: return True else: return False def check_avoidaince(self,object_num=4): for i in range(object_num): if math.fabs(self.sim.data.qvel[5+i*3]) > 0.1 or math.fabs(self.sim.data.qvel[6+3*i]) > 0.1: return True return False def reset(self): self.x_motor = 0 self.y_motor = 0 self.robot_x_data = [] self.robot_y_data = [] self.ball_x_data = [] self.ball_y_data = [] self.sim.reset() # self.sim.data.qpos[0] = np.random.randint(-3,3) self.sim.data.qpos[1] = np.random.randint(-5,5) def render(self): self.viewer.render() def plot_data(self,step,t,done,episode,flag,reward): self.field_x = [-90,-90,90,90,-90] self.field_y = [-60,60,60,-60,-60] self.robot_x_data.append(self.sim.data.body_xpos[1][0]) self.robot_y_data.append(self.sim.data.body_xpos[1][1]) self.ball_x_data.append(self.sim.data.body_xpos[2][0]) self.ball_y_data.append(self.sim.data.body_xpos[2][1]) datas = str(self.robot_x_data[-1])+" "+str(self.robot_y_data[-1])+" "+str(self.ball_x_data[-1])+" "+str(self.ball_y_data[-1])+" "+str(reward) with open(self.path + '/plotdata_' + str(episode+1).zfill(4)+ '.txt','a') as f: f.write(str(datas)+'\n') if (t >= step-1 or done) and flag: fig1 = plt.figure() plt.ion() plt.show() plt.plot(self.ball_x_data,self.ball_y_data,marker='o',markersize=2,color="red",label="ball") plt.plot(self.robot_x_data,self.robot_y_data,marker="o",markersize=2,color='blue',label="robot") plt.plot(self.sim.data.body_xpos[3][0],self.sim.data.body_xpos[3][1],marker="o",markersize=8,color='black') plt.plot(self.sim.data.body_xpos[4][0],self.sim.data.body_xpos[4][1],marker="o",markersize=8,color='black') plt.plot(self.sim.data.body_xpos[5][0],self.sim.data.body_xpos[5][1],marker="o",markersize=8,color='black') plt.plot(self.sim.data.body_xpos[6][0],self.sim.data.body_xpos[6][1],marker="o",markersize=8,color='black') plt.plot(self.field_x,self.field_y,markersize=1,color="black") plt.plot(80,0,marker="X",color="green",label="goal") plt.legend(loc="lower right") plt.draw() plt.pause(0.001) plt.close(1)
def test_inverse_kinematics(): panda_kinematics = ikpy_panda_kinematics.panda_kinematics() model = load_model_from_path("robot.xml") sim = MjSim(model) viewer = MjViewer(sim) timestep = generatePatternedTrajectories.TIMESTEP control_freq = 1/timestep total_time = 3 num_cycles = int(total_time * control_freq) qd_init = np.zeros(7) plt.ion() LW = 1.0 fig = plt.figure(figsize=(4,15)) axes = [] lines = [] goals = [] min_vals = {'x': -0.5, 'y': -0.5, 'z': 0.2} max_vals = {'x': 0.5, 'y': 0.5, 'z': 0.7} ylabels = ["x(m)", "y(m)", "z(m)"] ylbounds = [min_vals['x'], min_vals['y'], min_vals['z']] yubounds = [max_vals['x'], max_vals['y'], max_vals['z']] for i in range(3): axes.append(fig.add_subplot(3,1,i+1)) lines.append(axes[i].plot([],[],'b-', lw=LW)[0]) goals.append(axes[i].plot([],[],'r-', lw=LW)[0]) axes[i].set_ylim([ylbounds[i], yubounds[i]]) axes[i].set_xlim([0,total_time]) axes[i].set_ylabel(ylabels[i], fontsize=8) axes[i].set_xlabel("Time(s)", fontsize=8) for test in range(5): x_target = np.random.rand() * (max_vals['x'] - min_vals['x']) + min_vals['x'] y_target = np.random.rand() * (max_vals['y'] - min_vals['y']) + min_vals['y'] z_target = np.random.rand() * (max_vals['z'] - min_vals['z']) + min_vals['z'] if\ min_vals['x'] > x_target or max_vals['x'] < x_target or \ min_vals['y'] > y_target or max_vals['y'] < y_target or \ min_vals['z'] > z_target or max_vals['z'] < z_target: print("Error! 'xpos' out of range!") print("x = %.3f\ny = %.3f\nz = %.3f" % (x_target, y_target, z_target)) print(max_vals['y'] - min_vals['y']) return # x_target, y_target, z_target = 0.088, -0.0, 0.926 # roll = np.random.rand() * np.pi - np.pi/2 # pitch = np.random.rand() * np.pi - np.pi/2 # yaw = np.random.rand() * np.pi - np.pi/2 roll = 0 pitch = 0 yaw = np.pi ee_goal = [x_target, y_target, z_target, roll, pitch, yaw] # temp = bounds.getRandPosInBounds() # ee_goal = panda_kinematics.forward_kinematics(temp) q_init = bounds.getRandPosInBounds() q_goal = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6], init_qpos=q_init) for g in range(3): goals[g].set_ydata(np.ones(num_cycles) * ee_goal[g]) goals[g].set_xdata(np.linspace(0,3,num_cycles)) sim.set_state(MjSimState(time=0,qpos=q_init,qvel=qd_init,act=None,udd_state={})) sim.step() sim_time = 0 for i in range(num_cycles): state = sim.get_state() q = state[1] qd = state[2] sim.data.ctrl[:] = controllers.PDControl(q=q,qd=qd,qgoal=q_goal) sim.step() viewer.render() if i % 70 == 0: xpos = panda_kinematics.forward_kinematics(q) for a in range(3): lines[a].set_xdata(np.append(lines[a].get_xdata(), sim_time)) lines[a].set_ydata(np.append(lines[a].get_ydata(), xpos[a])) fig.canvas.draw() fig.canvas.flush_events() print("[q\t]:{}".format(np.around(q,3))) print("[qgoal\t]:{}".format(np.around(q_goal,3))) print("[qgoal2\t]:{}".format(np.around(panda_kinematics.inverse_kinematics(ee_goal[0:3], ee_goal[3:6]),3))) print("[EE\t]:{}".format(np.around(panda_kinematics.forward_kinematics(q),3))) print("[EEgoal\t]:{}".format(np.around(ee_goal,3))) sim_time += timestep # panda_kinematics.plot_stick_figure(q) for i in range(3): lines[i].set_xdata([]) lines[i].set_ydata([]) time.sleep(1)
def __init__(self, model_name, goal_space_train, goal_space_test, project_state_to_end_goal, end_goal_thresholds, initial_state_space, subgoal_bounds, project_state_to_subgoal, subgoal_thresholds, max_actions=1200, num_frames_skip=10, show=False): self.name = model_name # Create Mujoco Simulation self.model = load_model_from_path("./mujoco_files/" + model_name) self.sim = MjSim(self.model) # Set dimensions and ranges of states, actions, and goals in order to configure actor/critic networks if model_name == "pendulum.xml": self.state_dim = 2 * len(self.sim.data.qpos) + len( self.sim.data.qvel) else: self.state_dim = len(self.sim.data.qpos) + len( self.sim.data.qvel ) # State will include (i) joint angles and (ii) joint velocities self.action_dim = len( self.sim.model.actuator_ctrlrange) # low-level action dim self.action_bounds = self.sim.model.actuator_ctrlrange[:, 1] # low-level action bounds self.action_offset = np.zeros((len( self.action_bounds))) # Assumes symmetric low-level action ranges self.end_goal_dim = len(goal_space_test) self.subgoal_dim = len(subgoal_bounds) self.subgoal_bounds = subgoal_bounds # Projection functions self.project_state_to_end_goal = project_state_to_end_goal self.project_state_to_subgoal = project_state_to_subgoal # Convert subgoal bounds to symmetric bounds and offset. Need these to properly configure subgoal actor networks self.subgoal_bounds_symmetric = np.zeros((len(self.subgoal_bounds))) self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds))) for i in range(len(self.subgoal_bounds)): self.subgoal_bounds_symmetric[i] = (self.subgoal_bounds[i][1] - self.subgoal_bounds[i][0]) / 2 self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][ 1] - self.subgoal_bounds_symmetric[i] # End goal/subgoal thresholds self.end_goal_thresholds = end_goal_thresholds self.subgoal_thresholds = subgoal_thresholds # Set inital state and goal state spaces self.initial_state_space = initial_state_space self.goal_space_train = goal_space_train self.goal_space_test = goal_space_test self.subgoal_colors = [ "Magenta", "Green", "Red", "Blue", "Cyan", "Orange", "Maroon", "Gray", "White", "Black" ] self.max_actions = max_actions # Implement visualization if necessary self.visualize = show # Visualization boolean if self.visualize: self.viewer = MjViewer(self.sim) self.num_frames_skip = num_frames_skip
#!/usr/bin/env python3 """ Shows how to toss a capsule to a container. """ from mujoco_py import load_model_from_path, MjSim, MjViewer import os import scipy.misc import cv2 import matplotlib.pyplot as plt import numpy as np model = load_model_from_path("model.xml") sim = MjSim(model) viewer = MjViewer(sim) sim_state = sim.get_state() count = 1 def get_image_ball_center(image_src): src = image_src im_width = image_src.shape[0] im_height = image_src.shape[1] #grayimg = np.zeros((im_width,im_height),dtype=np.int8) pixcount = 0 pixwx = 0.0 pixwy = 0.0 #=================================================================== grayimg = np.zeros((256, 256), dtype=np.int8) for i in range(0, 256):
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not os.path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None self.buffer_size = (1600, 1280) self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60, } self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if ( type(observation) is tuple) else observation.size bounds = self.model.actuator_ctrlrange.copy() low, high = bounds[:, 0], bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) self.observation_space = spaces.Box(-high, high) self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ------------------------------------------------------------------------ def reset_model(self): """Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def viewer_setup(self): """Called when the viewer is initialized and after every reset. Optionally implement this method, if you need to tinker with camera position and so forth. """ pass # ------------------------------------------------------------------------ def _reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) assert qvel.shape == (self.model.nv, ) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer = None return if mode == 'rgb_array': self.viewer_setup() return _read_pixels(self.sim, *self.buffer_size) elif mode == 'human': self._get_viewer().render() def _get_viewer(self, mode='human'): if self.viewer is None and mode == 'human': self.viewer = MjViewer(self.sim) self.viewer_setup() return self.viewer def state_vector(self): state = self.sim.get_state() return np.concatenate([state.qpos.flat, state.qvel.flat])
def main(): parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--robot_file', default='../../xml/simrobot/7dof/robot_mocap.xml', type=str, help='path to robot config') parser.add_argument('--save_dir', default='../../xml/gen_xmls/simrobot/reacher', type=str, help='path to save initial joint poses') print('Program starts at: \033[92m %s \033[0m' % datetime.now().strftime("%Y-%m-%d %H:%M")) args = parser.parse_args() np.random.seed(args.seed) filename = 'poses.json' robot_file = args.robot_file save_dir = args.save_dir ini_bds = np.array([[0.3, 0.6], [-0.2, 0.2], [0.5, 0.7]]) tgt_bds = np.array([[0.3, 0.6], [-0.3, 0.3], [-0.1, 0.3]]) initial_num = [4, 5, 3] target_num = [4, 7, 5] target_state = np.mgrid[tgt_bds[0][0]:tgt_bds[0][1]:complex(target_num[0]), tgt_bds[1][0]:tgt_bds[1][1]:complex(target_num[1]), tgt_bds[2][0]:tgt_bds[2][1]:complex(target_num[2])] target_state = target_state.reshape(3, -1).T initial_state = np.mgrid[ ini_bds[0][0]:ini_bds[0][1]:complex(initial_num[0]), ini_bds[1][0]:ini_bds[1][1]:complex(initial_num[1]), ini_bds[2][0]:ini_bds[2][1]:complex(initial_num[2])] initial_state = initial_state.reshape(3, -1).T np.random.shuffle(target_state) np.random.shuffle(initial_state) assert os.path.exists(robot_file) model = load_model_from_path(robot_file) sim = MjSim(model, nsubsteps=40) sim.reset() sim.step() site_xpos_cur = sim.data.site_xpos[0] print('site xpos:', site_xpos_cur) init_joint_angles = [] goal_poses = [] for idx in tqdm(range(initial_state.shape[0])): sim.reset() sim.step() site_xpos_target = initial_state[idx] delta = site_xpos_target - site_xpos_cur sim.data.mocap_pos[0] = sim.data.mocap_pos[0] + delta for i in range(30): sim.step() dist = np.linalg.norm(sim.data.site_xpos[0] - site_xpos_target) if dist < 0.002: joint_angle = sim.data.qpos[:7] init_joint_angles.append(joint_angle.tolist()) break for idx in tqdm(range(target_state.shape[0])): sim.reset() sim.step() site_xpos_target = target_state[idx] delta = site_xpos_target - site_xpos_cur sim.data.mocap_pos[0] = sim.data.mocap_pos[0] + delta for i in range(30): sim.step() dist = np.linalg.norm(sim.data.site_xpos[0] - site_xpos_target) if dist < 0.002: goal_poses.append(site_xpos_target.tolist()) break print('valid initial pose num: ', len(init_joint_angles)) print('valid goal pose num: ', len(goal_poses)) data = {'initial_joint_angles': init_joint_angles, 'ee_goal': goal_poses} os.makedirs(save_dir, exist_ok=True) save_file = os.path.join(save_dir, filename) print('saving to: ', save_file) with open(save_file, 'w') as f: json.dump(data, f, indent=2) print('Program ends at: \033[92m %s \033[0m' % datetime.now().strftime("%Y-%m-%d %H:%M"))
class Environment(): def __init__(self, model_name, goal_space_train, goal_space_test, project_state_to_end_goal, end_goal_thresholds, initial_state_space, subgoal_bounds, project_state_to_subgoal, subgoal_thresholds, max_actions=1200, num_frames_skip=10, show=False): self.name = model_name # Create Mujoco Simulation self.model = load_model_from_path("./mujoco_files/" + model_name) self.sim = MjSim(self.model) # Set dimensions and ranges of states, actions, and goals in order to configure actor/critic networks if model_name == "pendulum.xml": self.state_dim = 2 * len(self.sim.data.qpos) + len( self.sim.data.qvel) else: self.state_dim = len(self.sim.data.qpos) + len( self.sim.data.qvel ) # State will include (i) joint angles and (ii) joint velocities self.action_dim = len( self.sim.model.actuator_ctrlrange) # low-level action dim self.action_bounds = self.sim.model.actuator_ctrlrange[:, 1] # low-level action bounds self.action_offset = np.zeros((len( self.action_bounds))) # Assumes symmetric low-level action ranges self.end_goal_dim = len(goal_space_test) self.subgoal_dim = len(subgoal_bounds) self.subgoal_bounds = subgoal_bounds # Projection functions self.project_state_to_end_goal = project_state_to_end_goal self.project_state_to_subgoal = project_state_to_subgoal # Convert subgoal bounds to symmetric bounds and offset. Need these to properly configure subgoal actor networks self.subgoal_bounds_symmetric = np.zeros((len(self.subgoal_bounds))) self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds))) for i in range(len(self.subgoal_bounds)): self.subgoal_bounds_symmetric[i] = (self.subgoal_bounds[i][1] - self.subgoal_bounds[i][0]) / 2 self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][ 1] - self.subgoal_bounds_symmetric[i] # End goal/subgoal thresholds self.end_goal_thresholds = end_goal_thresholds self.subgoal_thresholds = subgoal_thresholds # Set inital state and goal state spaces self.initial_state_space = initial_state_space self.goal_space_train = goal_space_train self.goal_space_test = goal_space_test self.subgoal_colors = [ "Magenta", "Green", "Red", "Blue", "Cyan", "Orange", "Maroon", "Gray", "White", "Black" ] self.max_actions = max_actions # Implement visualization if necessary self.visualize = show # Visualization boolean if self.visualize: self.viewer = MjViewer(self.sim) self.num_frames_skip = num_frames_skip # Get state, which concatenates joint positions and velocities def get_state(self): if self.name == "pendulum.xml": return np.concatenate([ np.cos(self.sim.data.qpos), np.sin(self.sim.data.qpos), self.sim.data.qvel ]) else: return np.concatenate((self.sim.data.qpos, self.sim.data.qvel)) # Reset simulation to state within initial state specified by user def reset_sim(self, next_goal=None): # Reset controls self.sim.data.ctrl[:] = 0 if self.name == "ant_reacher.xml": while True: # Reset joint positions and velocities for i in range(len(self.sim.data.qpos)): self.sim.data.qpos[i] = np.random.uniform( self.initial_state_space[i][0], self.initial_state_space[i][1]) for i in range(len(self.sim.data.qvel)): self.sim.data.qvel[i] = np.random.uniform( self.initial_state_space[len(self.sim.data.qpos) + i][0], self.initial_state_space[len(self.sim.data.qpos) + i][1]) # Ensure initial ant position is more than min_dist away from goal min_dist = 8 if np.linalg.norm(next_goal[:2] - self.sim.data.qpos[:2]) > min_dist: break elif self.name == "ant_four_rooms.xml": # Choose initial start state to be different than room containing the end goal # Determine which of four rooms contains goal goal_room = 0 if next_goal[0] < 0 and next_goal[1] > 0: goal_room = 1 elif next_goal[0] < 0 and next_goal[1] < 0: goal_room = 2 elif next_goal[0] > 0 and next_goal[1] < 0: goal_room = 3 # Place ant in room different than room containing goal # initial_room = (goal_room + 2) % 4 initial_room = np.random.randint(0, 4) while initial_room == goal_room: initial_room = np.random.randint(0, 4) # Set initial joint positions and velocities for i in range(len(self.sim.data.qpos)): self.sim.data.qpos[i] = np.random.uniform( self.initial_state_space[i][0], self.initial_state_space[i][1]) for i in range(len(self.sim.data.qvel)): self.sim.data.qvel[i] = np.random.uniform( self.initial_state_space[len(self.sim.data.qpos) + i][0], self.initial_state_space[len(self.sim.data.qpos) + i][1]) # Move ant to correct room self.sim.data.qpos[0] = np.random.uniform(3, 6.5) self.sim.data.qpos[1] = np.random.uniform(3, 6.5) # If goal should be in top left quadrant if initial_room == 1: self.sim.data.qpos[0] *= -1 # Else if goal should be in bottom left quadrant elif initial_room == 2: self.sim.data.qpos[0] *= -1 self.sim.data.qpos[1] *= -1 # Else if goal should be in bottom right quadrant elif initial_room == 3: self.sim.data.qpos[1] *= -1 # print("Goal Room: %d" % goal_room) # print("Initial Ant Room: %d" % initial_room) else: # Reset joint positions and velocities for i in range(len(self.sim.data.qpos)): self.sim.data.qpos[i] = np.random.uniform( self.initial_state_space[i][0], self.initial_state_space[i][1]) for i in range(len(self.sim.data.qvel)): self.sim.data.qvel[i] = np.random.uniform( self.initial_state_space[len(self.sim.data.qpos) + i][0], self.initial_state_space[len(self.sim.data.qpos) + i][1]) self.sim.step() # Return state return self.get_state() # Execute low-level action for number of frames specified by num_frames_skip def execute_action(self, action): self.sim.data.ctrl[:] = action for _ in range(self.num_frames_skip): self.sim.step() if self.visualize: self.viewer.render() return self.get_state() # Visualize end goal. This function may need to be adjusted for new environments. def display_end_goal(self, end_goal): # Goal can be visualized by changing the location of the relevant site object. if self.name == "pendulum.xml": self.sim.data.mocap_pos[0] = np.array([ 0.5 * np.sin(end_goal[0]), 0, 0.5 * np.cos(end_goal[0]) + 0.6 ]) elif self.name == "ur5.xml": theta_1 = end_goal[0] theta_2 = end_goal[1] theta_3 = end_goal[2] # shoulder_pos_1 = np.array([0,0,0,1]) upper_arm_pos_2 = np.array([0, 0.13585, 0, 1]) forearm_pos_3 = np.array([0.425, 0, 0, 1]) wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1]) # Transformation matrix from shoulder to base reference frame T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159], [0, 0, 0, 1]]) # Transformation matrix from upper arm to shoulder reference frame T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0], [np.sin(theta_1), np.cos(theta_1), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # Transformation matrix from forearm to upper arm reference frame T_3_2 = np.array([[np.cos(theta_2), 0, np.sin(theta_2), 0], [0, 1, 0, 0.13585], [-np.sin(theta_2), 0, np.cos(theta_2), 0], [0, 0, 0, 1]]) # Transformation matrix from wrist 1 to forearm reference frame T_4_3 = np.array([[np.cos(theta_3), 0, np.sin(theta_3), 0.425], [0, 1, 0, 0], [-np.sin(theta_3), 0, np.cos(theta_3), 0], [0, 0, 0, 1]]) # Determine joint position relative to original reference frame # shoulder_pos = T_1_0.dot(shoulder_pos_1) upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3] forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(forearm_pos_3)[:3] wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot( wrist_1_pos_4)[:3] joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos] """ print("\nEnd Goal Joint Pos: ") print("Upper Arm Pos: ", joint_pos[0]) print("Forearm Pos: ", joint_pos[1]) print("Wrist Pos: ", joint_pos[2]) """ for i in range(3): self.sim.data.mocap_pos[i] = joint_pos[i] elif self.name == "ant_reacher.xml" or self.name == "ant_four_rooms.xml": self.sim.data.mocap_pos[0][:3] = np.copy(end_goal[:3]) else: assert False, "Provide display end goal function in environment.py file" # Function returns an end goal def get_next_goal(self, test): end_goal = np.zeros((len(self.goal_space_test))) if self.name == "ur5.xml": goal_possible = False while not goal_possible: end_goal = np.zeros(shape=(self.end_goal_dim, )) end_goal[0] = np.random.uniform(self.goal_space_test[0][0], self.goal_space_test[0][1]) end_goal[1] = np.random.uniform(self.goal_space_test[1][0], self.goal_space_test[1][1]) end_goal[2] = np.random.uniform(self.goal_space_test[2][0], self.goal_space_test[2][1]) # Next need to ensure chosen joint angles result in achievable task (i.e., desired end effector position is above ground) theta_1 = end_goal[0] theta_2 = end_goal[1] theta_3 = end_goal[2] # shoulder_pos_1 = np.array([0,0,0,1]) upper_arm_pos_2 = np.array([0, 0.13585, 0, 1]) forearm_pos_3 = np.array([0.425, 0, 0, 1]) wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1]) # Transformation matrix from shoulder to base reference frame T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159], [0, 0, 0, 1]]) # Transformation matrix from upper arm to shoulder reference frame T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0], [np.sin(theta_1), np.cos(theta_1), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # Transformation matrix from forearm to upper arm reference frame T_3_2 = np.array([[np.cos(theta_2), 0, np.sin(theta_2), 0], [0, 1, 0, 0.13585], [-np.sin(theta_2), 0, np.cos(theta_2), 0], [0, 0, 0, 1]]) # Transformation matrix from wrist 1 to forearm reference frame T_4_3 = np.array([[np.cos(theta_3), 0, np.sin(theta_3), 0.425], [0, 1, 0, 0], [-np.sin(theta_3), 0, np.cos(theta_3), 0], [0, 0, 0, 1]]) forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot( forearm_pos_3)[:3] wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot( wrist_1_pos_4)[:3] # Make sure wrist 1 pos is above ground so can actually be reached if np.absolute(end_goal[0]) > np.pi / 4 and forearm_pos[ 2] > 0.05 and wrist_1_pos[2] > 0.15: goal_possible = True elif self.name == "ant_four_rooms.xml": # Randomly select one of the four rooms in which the goal will be located room_num = np.random.randint(0, 4) # Pick exact goal location end_goal[0] = np.random.uniform(3, 6.5) end_goal[1] = np.random.uniform(3, 6.5) end_goal[2] = np.random.uniform(0.45, 0.55) # If goal should be in top left quadrant if room_num == 1: end_goal[0] *= -1 # Else if goal should be in bottom left quadrant elif room_num == 2: end_goal[0] *= -1 end_goal[1] *= -1 # Else if goal should be in bottom right quadrant elif room_num == 3: end_goal[1] *= -1 elif not test and self.goal_space_train is not None: for i in range(len(self.goal_space_train)): end_goal[i] = np.random.uniform(self.goal_space_train[i][0], self.goal_space_train[i][1]) else: assert self.goal_space_test is not None, "Need goal space for testing. Set goal_space_test variable in \"design_env.py\" file" for i in range(len(self.goal_space_test)): end_goal[i] = np.random.uniform(self.goal_space_test[i][0], self.goal_space_test[i][1]) # Visualize End Goal self.display_end_goal(end_goal) return end_goal # Visualize all subgoals def display_subgoals(self, subgoals): # Display up to 10 subgoals and end goal if len(subgoals) <= 11: subgoal_ind = 0 else: subgoal_ind = len(subgoals) - 11 for i in range(1, min(len(subgoals), 11)): if self.name == "pendulum.xml": self.sim.data.mocap_pos[i] = np.array([ 0.5 * np.sin(subgoals[subgoal_ind][0]), 0, 0.5 * np.cos(subgoals[subgoal_ind][0]) + 0.6 ]) # Visualize subgoal self.sim.model.site_rgba[i][3] = 1 subgoal_ind += 1 elif self.name == "ur5.xml": theta_1 = subgoals[subgoal_ind][0] theta_2 = subgoals[subgoal_ind][1] theta_3 = subgoals[subgoal_ind][2] # shoulder_pos_1 = np.array([0,0,0,1]) upper_arm_pos_2 = np.array([0, 0.13585, 0, 1]) forearm_pos_3 = np.array([0.425, 0, 0, 1]) wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1]) # Transformation matrix from shoulder to base reference frame T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159], [0, 0, 0, 1]]) # Transformation matrix from upper arm to shoulder reference frame T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0], [np.sin(theta_1), np.cos(theta_1), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # Transformation matrix from forearm to upper arm reference frame T_3_2 = np.array([[np.cos(theta_2), 0, np.sin(theta_2), 0], [0, 1, 0, 0.13585], [-np.sin(theta_2), 0, np.cos(theta_2), 0], [0, 0, 0, 1]]) # Transformation matrix from wrist 1 to forearm reference frame T_4_3 = np.array([[np.cos(theta_3), 0, np.sin(theta_3), 0.425], [0, 1, 0, 0], [-np.sin(theta_3), 0, np.cos(theta_3), 0], [0, 0, 0, 1]]) # Determine joint position relative to original reference frame # shoulder_pos = T_1_0.dot(shoulder_pos_1) upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3] forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot( forearm_pos_3)[:3] wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot( wrist_1_pos_4)[:3] joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos] """ print("\nSubgoal %d Joint Pos: " % i) print("Upper Arm Pos: ", joint_pos[0]) print("Forearm Pos: ", joint_pos[1]) print("Wrist Pos: ", joint_pos[2]) """ # Designate site position for upper arm, forearm and wrist for j in range(3): self.sim.data.mocap_pos[3 + 3 * (i - 1) + j] = np.copy( joint_pos[j]) self.sim.model.site_rgba[3 + 3 * (i - 1) + j][3] = 1 # print("\nLayer %d Predicted Pos: " % i, wrist_1_pos[:3]) subgoal_ind += 1 elif self.name == "ant_reacher.xml" or self.name == "ant_four_rooms.xml": self.sim.data.mocap_pos[i][:3] = np.copy( subgoals[subgoal_ind][:3]) self.sim.model.site_rgba[i][3] = 1 subgoal_ind += 1 else: # Visualize desired gripper position, which is elements 18-21 in subgoal vector self.sim.data.mocap_pos[i] = subgoals[subgoal_ind] # Visualize subgoal self.sim.model.site_rgba[i][3] = 1 subgoal_ind += 1
def train_POMDP(self): args = self.args ROOT_DIR = os.path.dirname(os.path.dirname( os.path.abspath(__file__))) # corl2019 PARENT_DIR = os.path.dirname(ROOT_DIR) # reserach # Create the output directory if it does not exist output_dir = os.path.join(PARENT_DIR, 'multistep_pomdp', args.output_dir) if not os.path.isdir(output_dir): os.makedirs(output_dir) # write args to file with open(os.path.join(output_dir, 'args.txt'), 'w+') as f: json.dump(args.__dict__, f, indent=2) f.close() # Create our policy net and a target net self.policy_net_1 = DRQN(args.indim, args.outdim).to(args.device) self.policy_net_2 = DRQN(args.indim, args.outdim).to(args.device) self.policy_net_3 = DRQN(args.indim, args.outdim).to(args.device) # Set up the optimizer self.optimizer_1 = optim.RMSprop(self.policy_net_1.parameters()) self.optimizer_2 = optim.RMSprop(self.policy_net_2.parameters()) self.optimizer_3 = optim.RMSprop(self.policy_net_3.parameters()) self.memory = RecurrentMemory(800) self.steps_done = 0 # Setup the state normalizer normalizer = Multimodal_Normalizer(num_inputs=args.indim, device=args.device) print_variables = {'durations': [], 'rewards': [], 'loss': []} start_episode = 0 if args.checkpoint_file: if os.path.exists(args.checkpoint_file): checkpoint = torch.load(args.checkpoint_file) self.policy_net_1.load_state_dict(checkpoint['policy_net_1']) self.policy_net_2.load_state_dict(checkpoint['policy_net_2']) self.policy_net_3.load_state_dict(checkpoint['policy_net_3']) self.optimizer_1.load_state_dict(checkpoint['optimizer_1']) self.optimizer_2.load_state_dict(checkpoint['optimizer_2']) self.optimizer_3.load_state_dict(checkpoint['optimizer_3']) start_episode = checkpoint['epochs'] self.steps_done = checkpoint['steps_done'] with open( os.path.join(os.path.dirname(args.checkpoint_file), 'results_pomdp.pkl'), 'rb') as file: plot_dict = pickle.load(file) print_variables['durations'] = plot_dict['durations'] print_variables['rewards'] = plot_dict['rewards'] if args.normalizer_file: if os.path.exists(args.normalizer_file): normalizer.restore_state(args.normalizer_file) if args.memory: if os.path.exists(args.memory): self.memory.load(args.memory) action_space = ActionSpace(dp=0.06, df=10) # Create robot, reset simulation and grasp handle model = load_model_from_path(args.model_path) sim = MjSim(model) sim_param = SimParameter(sim) sim.step() if args.render: viewer = MjViewer(sim) else: viewer = None robot = RobotSim(sim, viewer, sim_param, args.render, self.break_threshold) # Main training loop for ii in range(start_episode, args.epochs): start_time = time.time() self.steps_done += 1 act_sequence = [] if args.sim: sim_params = init_model(robot.mj_sim) robot.reset_simulation() ret = robot.grasp_handle() if not ret: continue # Local memory for current episode localMemory = [] # Get current observation hidden_state_1, cell_state_1 = self.policy_net_1.init_hidden_states( batch_size=1, device=args.device) hidden_state_2, cell_state_2 = self.policy_net_2.init_hidden_states( batch_size=1, device=args.device) hidden_state_3, cell_state_3 = self.policy_net_3.init_hidden_states( batch_size=1, device=args.device) observation_space = TactileObs( robot.get_gripper_xpos(), # 24 robot.get_all_touch_buffer(args.hap_sample)) # 30 x 7 broken_so_far = 0 for t in count(): if not args.quiet and t % 50 == 0: print("Running training episode: {}, iteration: {}".format( ii, t)) # Select action observation = observation_space.get_state() if args.position: observation = observation[6:] if args.shear: indices = np.ones(len(observation), dtype=bool) indices[6:166] = False observation = observation[indices] if args.force: observation = observation[:166] normalizer.observe(observation) observation = normalizer.normalize(observation) action, hidden_state_1, cell_state_1 = self.select_action( observation, hidden_state_1, cell_state_1) # record actions in this epoch act_sequence.append(action) # Perform action delta = action_space.get_action( self.ACTIONS[action])['delta'][:3] target_position = np.add(robot.get_gripper_jpos()[:3], np.array(delta)) target_pose = np.hstack( (target_position, robot.get_gripper_jpos()[3:])) if args.sim: robot.move_joint(target_pose, True, self.gripping_force, hap_sample=args.hap_sample) # Get reward done, num = robot.update_tendons() failure = robot.check_slippage() if num > broken_so_far: reward = num - broken_so_far broken_so_far = num else: reward = 0 # # Add a movement reward # reward -= 0.05 * np.linalg.norm(target_position - robot.get_gripper_jpos()[:3]) / np.linalg.norm(delta) # Observe new state observation_space.update( robot.get_gripper_xpos(), # 24 robot.get_all_touch_buffer(args.hap_sample)) # 30x7 # Set max number of iterations if t >= self.max_iter: done = True # Check if done if not done and not failure: next_state = observation_space.get_state() if args.position: next_state = next_state[6:] if args.shear: indices = np.ones(len(next_state), dtype=bool) indices[6:166] = False next_state = next_state[indices] if args.force: next_state = next_state[:166] normalizer.observe(next_state) next_state = normalizer.normalize(next_state) else: next_state = None # Push new Transition into memory localMemory.append( Transition(observation, action, next_state, reward)) # Optimize the model if t % 10 == 0: loss = self.optimize_model() # if loss: # print_variables['loss'].append(loss.item()) # If we are done, reset the model if done or failure: self.memory.push(localMemory) if failure: print_variables['durations'].append(self.max_iter) else: print_variables['durations'].append(t) print_variables['rewards'].append(broken_so_far) plot_variables(self.figure, print_variables, "Training POMDP") print("Model parameters: {}".format(sim_params)) print("Actions in this epoch are: {}".format(act_sequence)) print("Epoch {} took {}s, total number broken: {}\n\n". format(ii, time.time() - start_time, broken_so_far)) break # Save checkpoints every vew iterations if ii % args.save_freq == 0: save_path = os.path.join( output_dir, 'checkpoint_model_' + str(ii) + '.pth') torch.save( { 'epochs': ii, 'steps_done': self.steps_done, 'policy_net_1': self.policy_net_1.state_dict(), 'policy_net_2': self.policy_net_2.state_dict(), 'policy_net_3': self.policy_net_3.state_dict(), 'optimizer_1': self.optimizer_1.state_dict(), 'optimizer_2': self.optimizer_2.state_dict(), 'optimizer_3': self.optimizer_3.state_dict(), }, save_path) self.memory.save_memory(os.path.join(output_dir, 'memory.pickle')) if args.savefig_path: now = dt.datetime.now() self.figure[0].savefig( args.savefig_path + '{}_{}_{}.png'.format(now.month, now.day, now.hour), format='png') print('Training done') plt.show() return print_variables
def get_sim(seed): model = load_model_from_mjb(pattern) return MjSim(model)
class BaseEnv: def __init__( self, robot_folders, robot_dir, substeps, tol=0.02, train=True, with_kin=None, with_dyn=None, multi_goal=False, ): self.with_kin = with_kin self.with_dyn = with_dyn self.multi_goal = multi_goal self.goal_dim = 3 if self.with_dyn: norm_file = os.path.join(robot_dir, 'stats/dyn_stats.json') with open(norm_file, 'r') as f: stats = json.load(f) self.dyn_mu = np.array(stats['mu']).reshape(-1) self.dyn_sigma = np.array(stats['sigma']).reshape(-1) self.dyn_min = np.array(stats['min']).reshape(-1) self.dyn_max = np.array(stats['max']).reshape(-1) self.nsubsteps = substeps self.metadata = {} self.reward_range = (-np.inf, np.inf) self.spec = None self.dist_tol = tol self.viewer = None self.links = [ 'gl0', 'gl1_1', 'gl1_2', 'gl2', 'gl3_1', 'gl3_2', 'gl4', 'gl5_1', 'gl5_2', 'gl6' ] self.bodies = [ "connector_plate_base", "electric_gripper_base", "gripper_l_finger", "gripper_l_finger_tip", "gripper_r_finger", "gripper_r_finger_tip", "l0", "l1", "l2", "l3", "l4", "l5", "l6" ] self.site_set = ['j0', 'j1', 'j2', 'j3', 'j4', 'j5', 'j6'] self.joint_set = self.site_set self.robots = [] for folder in robot_folders: self.robots.append(os.path.join(robot_dir, folder)) self.dir2id = {folder: idx for idx, folder in enumerate(self.robots)} self.robot_num = len(self.robots) if train: self.test_robot_num = min(100, self.robot_num) self.train_robot_num = self.robot_num - self.test_robot_num self.test_robot_ids = list( range(self.train_robot_num, self.robot_num)) self.train_test_robot_num = min(100, self.train_robot_num) self.train_test_robot_ids = list(range(self.train_test_robot_num)) self.train_test_conditions = self.train_test_robot_num else: self.test_robot_num = self.robot_num self.train_robot_num = self.robot_num - self.test_robot_num self.test_robot_ids = list(range(self.robot_num)) self.test_conditions = self.test_robot_num print('Train robots: ', self.train_robot_num) print('Test robots: ', self.test_robot_num) print('Multi goal:', self.multi_goal) self.reset_robot(0) self.ob_dim = self.get_obs()[0].size print('Ob dim:', self.ob_dim) high = np.inf * np.ones(self.ob_dim) low = -high self.observation_space = spaces.Box(low, high, dtype=np.float32) self.ep_reward = 0 self.ep_len = 0 def reset(self, robot_id=None): raise NotImplementedError def step(self, action): raise NotImplementedError def update_action_space(self): actuators = self.sim.model._actuator_name2id.keys() valid_joints = [ac for ac in actuators if ac in self.joint_set] self.act_dim = len(valid_joints) bounds = self.sim.model.actuator_ctrlrange[:self.act_dim] self.ctrl_low = np.copy(bounds[:, 0]) self.ctrl_high = np.copy(bounds[:, 1]) self.action_space = spaces.Box(self.ctrl_low, self.ctrl_high, dtype=np.float32) def scale_action(self, action): act_k = (self.action_space.high - self.action_space.low) / 2. act_b = (self.action_space.high + self.action_space.low) / 2. return act_k * action + act_b def reset_robot(self, robot_id): self.robot_folder_id = self.dir2id[self.robots[robot_id]] robot_file = os.path.join(self.robots[robot_id], 'robot.xml') self.model = load_model_from_path(robot_file) self.sim = MjSim(self.model, nsubsteps=self.nsubsteps) self.update_action_space() self.sim.reset() self.sim.step() if self.viewer is not None: self.viewer = MjViewer(self.sim) def test_reset(self, cond): robot_id = self.test_robot_ids[cond] return self.reset(robot_id=robot_id) def train_test_reset(self, cond): robot_id = self.train_test_robot_ids[cond] return self.reset(robot_id=robot_id) def cal_reward(self, s, goal, a): dist = np.linalg.norm(s - goal) if dist < self.dist_tol: done = True reward_dist = 1 else: done = False reward_dist = -1 reward = reward_dist reward -= 0.1 * np.square(a).sum() return reward, dist, done def render(self, mode='human'): if self.viewer is None: self.viewer = MjViewer(self.sim) self.viewer.render() def get_obs(self): qpos = self.get_qpos(self.sim) qvel = self.get_qvel(self.sim) ob = np.concatenate([qpos, qvel]) if self.with_kin: link_rela = self.get_xpos_xrot(self.sim) ob = np.concatenate([ob, link_rela]) if self.with_dyn: body_mass = self.get_body_mass(self.sim) joint_friction = self.get_friction(self.sim) joint_damping = self.get_damping(self.sim) armature = self.get_armature(self.sim) dyn_vec = np.concatenate( (body_mass, joint_friction, joint_damping, armature)) dyn_vec = np.divide((dyn_vec - self.dyn_min), self.dyn_max - self.dyn_min) ob = np.concatenate([ob, dyn_vec]) target_pos = self.sim.data.get_site_xpos('target').flatten() ob = np.concatenate([ob, target_pos]) achieved_goal = self.sim.data.get_site_xpos('ref_pt') return ob, achieved_goal def get_link_length(self, sim): link_length = [] for link in self.links: geom_id = sim.model._geom_name2id[link] link_length.append(sim.model.geom_size[geom_id][1]) return np.asarray(link_length).reshape(-1) def get_qpos(self, sim): angle_noise_range = 0.02 qpos = sim.data.qpos[:self.act_dim] qpos += np.random.uniform(-angle_noise_range, angle_noise_range, self.act_dim) qpos = np.pad(qpos, (0, 7 - self.act_dim), mode='constant', constant_values=0) return qpos.reshape(-1) def get_qvel(self, sim): velocity_noise_range = 0.02 qvel = sim.data.qvel[:self.act_dim] qvel += np.random.uniform(-velocity_noise_range, velocity_noise_range, self.act_dim) qvel = np.pad(qvel, (0, 7 - self.act_dim), mode='constant', constant_values=0) return qvel.reshape(-1) def get_xpos_xrot(self, sim): xpos = [] xrot = [] for joint_id in range(self.act_dim): joint = sim.model._actuator_id2name[joint_id] if joint == 'j0': pos1 = sim.data.get_body_xpos('base_link') mat1 = sim.data.get_body_xmat('base_link') else: prev_id = joint_id - 1 prev_joint = sim.model._actuator_id2name[prev_id] pos1 = sim.data.get_site_xpos(prev_joint) mat1 = sim.data.get_site_xmat(prev_joint) pos2 = sim.data.get_site_xpos(joint) mat2 = sim.data.get_site_xmat(joint) relative_pos = pos2 - pos1 rot_euler = self.relative_rotation(mat1, mat2) xpos.append(relative_pos) xrot.append(rot_euler) xpos = np.array(xpos).flatten() xrot = np.array(xrot).flatten() xpos = np.pad(xpos, (0, (7 - self.act_dim) * 3), mode='constant', constant_values=0) xrot = np.pad(xrot, (0, (7 - self.act_dim) * 3), mode='constant', constant_values=0) ref_pt_xpos = self.sim.data.get_site_xpos('ref_pt') ref_pt_xmat = self.sim.data.get_site_xmat('ref_pt') relative_pos = ref_pt_xpos - pos2 rot_euler = self.relative_rotation(mat2, ref_pt_xmat) xpos = np.concatenate((xpos, relative_pos.flatten())) xrot = np.concatenate((xrot, rot_euler.flatten())) pos_rot = np.concatenate((xpos, xrot)) return pos_rot def get_damping(self, sim): damping = sim.model.dof_damping[:self.act_dim] damping = np.pad(damping, (0, 7 - self.act_dim), mode='constant', constant_values=0) return damping.reshape(-1) def get_friction(self, sim): friction = sim.model.dof_frictionloss[:self.act_dim] friction = np.pad(friction, (0, 7 - self.act_dim), mode='constant', constant_values=0) return friction.reshape(-1) def get_body_mass(self, sim): body_mass = [] for body in self.bodies: body_id = sim.model._body_name2id[body] body_mass.append(sim.model.body_mass[body_id]) return np.asarray(body_mass).reshape(-1) def get_armature(self, sim): armature = sim.model.dof_armature[:self.act_dim] armature = np.pad(armature, (0, 7 - self.act_dim), mode='constant', constant_values=0) return armature.reshape(-1) def relative_rotation(self, mat1, mat2): # return the euler x,y,z of the relative rotation # (w.r.t site1 coordinate system) from site2 to site1 rela_mat = np.dot(np.linalg.inv(mat1), mat2) return rotations.mat2euler(rela_mat) def close(self): pass
#This script renders image from xml model into png # To run this script mujoco and mujoco-py must be installed # Run: python3 save_rgb.py <model_with_path> <name_of_used_camera> <output_image_with_path> import sys from matplotlib import pyplot as plt from mujoco_py import load_model_from_path, MjSim, functions import numpy as np import time model_path = sys.argv[1] camera_name = sys.argv[2] img_with_path = sys.argv[3] model = load_model_from_path(model_path) sim = MjSim(model) img = sim.render(width=500, height=500, camera_name=camera_name) #print(img) plt.imshow(img, interpolation='nearest') plt.savefig(img_with_path) #plt.show()
def test_jacobians(): xml = """ <mujoco> <worldbody> <body name="body1" pos="0 0 0"> <joint axis="1 0 0" name="a" pos="0 0 0" type="hinge"/> <geom name="geom1" pos="0 0 0" size="1.0"/> <body name="body2" pos="0 0 1"> <joint name="b" axis="1 0 0" pos="0 0 1" type="hinge"/> <geom name="geom2" pos="1 1 1" size="0.5"/> <site name="target" size="0.1"/> </body> </body> </worldbody> <actuator> <motor joint="a"/> <motor joint="b"/> </actuator> </mujoco> """ model = load_model_from_xml(xml) sim = MjSim(model) sim.reset() # After reset jacobians are all zeros target_jacp = np.zeros(3 * sim.model.nv) sim.data.get_site_jacp('target', jacp=target_jacp) np.testing.assert_allclose(target_jacp, np.zeros(3 * sim.model.nv)) # After first forward, jacobians are real sim.forward() sim.data.get_site_jacp('target', jacp=target_jacp) target_test = np.array([0, 0, -1, 1, 0, 0]) np.testing.assert_allclose(target_jacp, target_test) # Should be unchanged after steps (zero action) for _ in range(2): sim.step() sim.forward() sim.data.get_site_jacp('target', jacp=target_jacp) assert np.linalg.norm(target_jacp - target_test) < 1e-3 # Apply a very large action, ensure jacobian unchanged after step sim.reset() sim.forward() sim.data.ctrl[:] = np.ones(sim.model.nu) * 1e9 sim.step() sim.data.get_site_jacp('target', jacp=target_jacp) np.testing.assert_allclose(target_jacp, target_test) # After large action, ensure jacobian changed after forward sim.forward() sim.data.get_site_jacp('target', jacp=target_jacp) assert not np.allclose(target_jacp, target_test) # Test the `site_jacp` property, which gets all at once np.testing.assert_allclose(target_jacp, sim.data.site_jacp[0]) # Test not passing in array sim.reset() sim.forward() target_jacp = sim.data.get_site_jacp('target') np.testing.assert_allclose(target_jacp, target_test) # Test passing in bad array (long instead of double) target_jacp = np.zeros(3 * sim.model.nv, dtype=np.long) with pytest.raises(ValueError): sim.data.get_site_jacp('target', jacp=target_jacp) # Test rotation jacobian - like above but 'jacr' instead of 'jacp' # After reset jacobians are all zeros sim.reset() target_jacr = np.zeros(3 * sim.model.nv) sim.data.get_site_jacr('target', jacr=target_jacr) np.testing.assert_allclose(target_jacr, np.zeros(3 * sim.model.nv)) # After first forward, jacobians are real sim.forward() sim.data.get_site_jacr('target', jacr=target_jacr) target_test = np.array([1, 1, 0, 0, 0, 0]) # Test allocating dedicated array target_jacr = sim.data.get_site_jacr('target') np.testing.assert_allclose(target_jacr, target_test) # Test the batch getter (all sites at once) np.testing.assert_allclose(target_jacr, sim.data.site_jacr[0]) # Test passing in bad array target_jacr = np.zeros(3 * sim.model.nv, dtype=np.long) with pytest.raises(ValueError): sim.data.get_site_jacr('target', jacr=target_jacr)
def get_sim(seed): model = load_model_from_path_fix_paths(xml_path=pattern) return MjSim(model)
def __init__(self): #super(lab_env, self).__init__(env) # The real-world simulator self.model = load_model_from_path(xml_path) self.sim = MjSim(self.model) self.viewer = MjViewer(self.sim)
with open('real.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: calculated_angle_list.append(float(row[0]) - 90) read_index = np.arange(0, 700) import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--render', help='render', type=bool, default=False) args = parser.parse_args() model = load_model_from_path("test_servo.xml") sim = MjSim(model) joints = ['servo_1'] joints_idx = list(map(lambda x: sim.model.joint_name2id(x), joints)) qpos = sim.data.qpos joints_pos = qpos[joints_idx] # viewer = MjViewer(sim) sim_state = sim.get_state() target_position_list = [] sim_position_list = [] i = 0
def test_arrays_of_objs(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() renderer = cymj.MjRenderContext(sim, offscreen=True) assert len(renderer.scn.camera) == 2, "Expecting scn.camera to be available"
</body> </worldbody> <actuator> <position joint="j" kp="2000"/> </actuator> </mujoco> """ fn = ''' #define SQUARE(a) (a * a) void fun(const mjModel* m, mjData* d) { for (int i = d->ne; i < d->nefc; i++) { pos_sum += SQUARE(d->efc_pos[i]); } } ''' sim = MjSim(load_model_from_xml(MODEL_XML), nsubsteps=50, substep_callback=fn, userdata_names=['pos_sum']) t = 0 while t < 10: t += 1 sim.data.ctrl[0] = .2 print('t', t) sim.step() # verify that there are no contacts visible between steps assert sim.data.ncon == 0, 'No contacts should be detected here' # verify that contacts (and penetrations) are visible to substep_callback if t > 1: assert sim.data.get_userdata('pos_sum') > 0 # detected collision
class PointEnvMDP(MDP): def __init__(self, init_mean=(-0.2, -0.2), control_cost=False, dense_reward=False, render=False): xml = os.path.join( os.path.expanduser("~"), "git-repos/dm_control/dm_control/suite/point_mass.xml") model = load_model_from_path(xml) self.sim = MjSim(model) self.render = render self.init_mean = init_mean self.control_cost = control_cost self.dense_reward = dense_reward if self.render: self.viewer = MjViewer(self.sim) # Config self.env_name = "Point-Mass-Environment" self.target_position = np.array([0., 0.]) self.target_tolerance = 0.02 self.init_noise = 0.05 self._initialize_mujoco_state() self.init_state = self.get_state() print("Loaded {} with dense_reward={}".format(self.env_name, self.dense_reward)) MDP.__init__(self, [0, 1], self._transition_func, self._reward_func, self.init_state) def _reward_func(self, state, action): self.next_state = self._step(action) if self.render: self.viewer.render() if self.dense_reward: reward = -np.linalg.norm(self.next_state.position - self.target_position) else: reward = +0. if self.next_state.is_terminal() else -1. control_cost = 0.1 * np.linalg.norm(action) if self.control_cost: reward = reward - control_cost return reward def _transition_func(self, state, action): return self.next_state def execute_agent_action(self, action, option_idx=None): if self.render and option_idx is not None: self.viewer.add_marker(pos=np.array([0, 0, 0.1]), size=0.001 * np.ones(3), label="Option {}".format(option_idx)) reward, next_state = super(PointEnvMDP, self).execute_agent_action(action) return reward, next_state def is_goal_state(self, state): position = state.features()[:2] if isinstance( state, PointEnvState) else state[:2] return self.is_goal_position(position) def is_goal_position(self, position): distance = np.linalg.norm(position - self.target_position) return distance <= self.target_tolerance def get_state(self): # Individually indexing to prevent Mujoco from altering state history x, y = self.sim.data.qpos[0], self.sim.data.qpos[1] x_dot, y_dot = self.sim.data.qvel[0], self.sim.data.qvel[1] # Create State object from simulator state position = np.array([x, y]) velocity = np.array([x_dot, y_dot]) # State is terminal when it is the goal state done = self.is_goal_position(position) state = PointEnvState(position, velocity, done) return state def _step(self, action): self.sim.data.ctrl[:] = action self.sim.step() return self.get_state() def _initialize_mujoco_state(self): init_position = np.array(self.init_mean) + np.random.uniform( 0., self.init_noise, 2) init_state = MjSimState(time=0., qpos=init_position, qvel=np.array([0., 0.]), act=None, udd_state={}) self.sim.set_state(init_state) @staticmethod def state_space_size(): return 4 @staticmethod def action_space_size(): return 2 @staticmethod def is_primitive_action(action): return -1. <= action.all() <= 1. def reset(self): self._initialize_mujoco_state() self.init_state = self.get_state() super(PointEnvMDP, self).reset() def __str__(self): return self.env_name
class SimManager(): """""" def __init__(self, filepath, blender_path=None, visualize=False, num_sims=1): self.filepath = filepath self.blender_path = blender_path self.visualize = visualize self.num_sims = num_sims self.base_model = load_model_from_path(filepath) if self.num_sims > 1: self.pool = MjRenderPool(self.base_model, n_workers=num_sims, modder=ArenaModder) else: self.sim = MjSim(self.base_model) self.arena_modder = ArenaModder(self.sim) if self.visualize: self.arena_modder.visualize = True self.viewer = MjViewer(self.sim) self.arena_modder.viewer = self.viewer else: self.viewer = None def forward(self): """ Advances simulator a step (NECESSARY TO MAKE CAMERA AND LIGHT MODDING WORK) """ if self.num_sims <= 1: self.sim.forward() if self.visualize: # Get angle of camera and display it quat = np.quaternion(*self.base_model.cam_quat[0]) ypr = quaternion.as_euler_angles(quat) * 180 / np.pi cam_pos = self.base_model.cam_pos[0] #self.viewer.add_marker(pos=cam_pos, label="CAM: {}{}".format(cam_pos, ypr)) self.viewer.add_marker(pos=cam_pos, label="CAM: {}".format(ypr)) self.viewer.render() def _get_pool_data(self): IMAGE_NOISE_RVARIANCE = Range(0.0, 0.0001) cam_imgs, ground_truths = self.pool.render(640, 360, camera_name='camera1', randomize=True) ground_truths = list(ground_truths) cam_imgs = list( cam_imgs[:, ::-1, :, :]) # Rendered images are upside-down. for i in range(len(cam_imgs)): image_noise_variance = sample(IMAGE_NOISE_RVARIANCE) cam_imgs[i] = (skimage.util.random_noise( cam_imgs[i], mode='gaussian', var=image_noise_variance) * 255).astype(np.uint8) cam_imgs[i] = preproc_image(cam_imgs[i]) return cam_imgs, ground_truths def _get_cam_frame(self, display=False, ground_truth=None): """Grab an image from the camera (224, 244, 3) to feed into CNN""" IMAGE_NOISE_RVARIANCE = Range(0.0, 0.0001) cam_img = self.sim.render( 640, 360, camera_name='camera1' )[::-1, :, :] # Rendered images are upside-down. image_noise_variance = sample(IMAGE_NOISE_RVARIANCE) cam_img = (skimage.util.random_noise( cam_img, mode='gaussian', var=image_noise_variance) * 255).astype( np.uint8) cam_img = preproc_image(cam_img) if display: label = str(ground_truth[3:6]) display_image(cam_img, label) return cam_img def _get_ground_truth(self): return self.arena_modder.get_ground_truth() def get_data(self): if self.num_sims > 1: return self._get_pool_data() else: self.arena_modder.randomize() gt = self._get_ground_truth() self.sim.forward() cam = self._get_cam_frame() return cam, gt def randrocks(self): """Generate a new set of 3 random rock meshes using a Blender script""" if self.blender_path is None: raise Exception( 'You must install Blender and include the path to its exectubale in the constructor to use this method' ) import subprocess subprocess.call( [self.blender_path, "--background", "--python", "randrock.py"])
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() try: observation, _reward, done, _info = self.step( np.zeros(self.model.nu)) except NotImplementedError: observation, _reward, done, _info = self._step( np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([ o.size for o in observation ]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high, dtype=np.float32) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high, dtype=np.float32) self.seed() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass def evaluate_success(self, paths, logger=None): """ Log various success metrics calculated based on input paths into the logger """ pass # ----------------------------- def reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) old_state = self.sim.get_state() new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel, old_state.act, old_state.udd_state) self.sim.set_state(new_state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 0.5 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def render(self, *args, **kwargs): pass #return self.mj_render() def _get_viewer(self): pass #return None def state_vector(self): state = self.sim.get_state() return np.concatenate([state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t + 1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, frame_size=(640, 480), mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t + 1 curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1], mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1, :, :]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + ".mp4" skvideo.io.vwrite(file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f" % (t1 - t0))
<joint axis="0 1 0" name="box:y" type="slide"/> </body> <body name="floor" pos="0 0 0.025"> <geom size="1.0 1.0 0.02" rgba="0 1 0 1" type="box"/> </body> </worldbody> </mujoco> """ def print_box_xpos(sim): print("box xpos:", sim.data.get_body_xpos("box")) model = load_model_from_xml(MODEL_XML) sim = MjSim(model) viewer = MjViewer(sim) states = [{'box:x': +0.8, 'box:y': +0.8}, {'box:x': -0.8, 'box:y': +0.8}, {'box:x': -0.8, 'box:y': -0.8}, {'box:x': +0.8, 'box:y': -0.8}, {'box:x': +0.0, 'box:y': +0.0}] # MjModel.joint_name2id returns the index of a joint in # MjData.qpos. x_joint_i = sim.model.get_joint_qpos_addr("box:x") y_joint_i = sim.model.get_joint_qpos_addr("box:y") print_box_xpos(sim)
def test_mj_sim_buffers(): model = load_model_from_xml(BASIC_MODEL_XML) # test no callback sim = MjSim(model, nsubsteps=2) assert(sim.udd_state == {}) sim.step() assert(sim.udd_state == {}) # test with callback foo = 10 d = {"foo": foo, "foo_2": np.array([foo, foo])} def udd_callback(sim): return d sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback) assert(sim.udd_state is not None) assert(sim.udd_state["foo"] == foo) assert(sim.udd_state["foo_2"].shape[0] == 2) assert(sim.udd_state["foo_2"][0] == foo) foo = 11 d = {"foo": foo, "foo_2": np.array([foo, foo])} sim.step() assert(sim.udd_state is not None) assert(sim.udd_state["foo"] == foo) assert(sim.udd_state["foo_2"][0] == foo) d = {} with pytest.raises(AssertionError): sim.step() d = {"foo": foo, "foo_2": np.array([foo, foo]), "foo_3": foo} with pytest.raises(AssertionError): sim.step() d = {"foo": foo, "foo_2": np.array([foo, foo, foo])} with pytest.raises(AssertionError): sim.step() d = {"foo": "haha", "foo_2": np.array([foo, foo, foo])} with pytest.raises(AssertionError): sim.step()
def general_usage(xml_path, ee_list, N): # ========================================================================== # Quantaties # ========================================================================== ee_SE3_mujoco, ee_jac_mujoco, M_mujoco, qfrc_bias_mujoco = dict(), dict( ), [], [] ee_SE3_tf_rbdl, ee_jac_tf_rbdl, M_tf_rbdl, qfrc_bias_tf_rbdl = dict( ), dict(), None, None for ee in ee_list: ee_SE3_mujoco[ee], ee_jac_mujoco[ee], ee_SE3_tf_rbdl[ ee], ee_jac_tf_rbdl[ee] = [], [], None, None # ========================================================================== # Mujoco # ========================================================================== mujoco_model = load_model_from_path(xml_path) m = MjSim(mujoco_model) m.forward() sim_state = m.get_state() q = np.random.uniform(-1., 1., (N, m.model.nq)) qdot = np.random.uniform(-1., 1., (N, m.model.nq)) for i in tqdm(range(N)): for j in range(m.model.nq): sim_state.qpos[j] = q[i, j] sim_state.qvel[j] = qdot[i, j] m.set_state(sim_state) m.forward() for ee in ee_list: T_ee = np.eye(4) T_ee[0:3, 0:3] = m.data.get_body_xmat(ee) T_ee[0:3, 3] = m.data.get_body_xpos(ee) ee_SE3_mujoco[ee].append(T_ee) J_ee = np.zeros((6, m.model.nq)) jacp = np.zeros(m.model.nv * 3) jacr = np.zeros(m.model.nv * 3) functions.mj_jac(m.model, m.data, jacp, jacr, m.data.get_body_xpos(ee), m.model.body_name2id(ee)) J_ee[0:3, :] = jacr.reshape(3, m.model.nv) J_ee[3:6, :] = jacp.reshape(3, m.model.nv) ee_jac_mujoco[ee].append(J_ee) M_ = np.zeros(m.model.nv * m.model.nv) functions.mj_fullM(m.model, M_, m.data.qM) M_mujoco.append(M_.reshape(m.model.nq, m.model.nq)) qfrc_bias_mujoco.append(np.copy(m.data.qfrc_bias)) # ========================================================================== # tf_rbdl # ========================================================================== ic = initial_config_from_mjcf(xml_path, ee_list, verbose=True) for ee in ee_list: ee_SE3_tf_rbdl[ee] = fk(ic['init_ee_SE3'][ee], ic['Blist'][ee], ic['Bidlist'][ee], tf.convert_to_tensor(q, tf.float32)) ee_local_jac_tf_rbdl = jacobian(ic['Blist'][ee], ic['Bidlist'][ee], tf.convert_to_tensor(q, tf.float32)) R = ee_SE3_tf_rbdl[ee][:, 0:3, 0:3] RR = tf.concat([ tf.concat([R, tf.zeros((N, 3, 3))], 2), tf.concat([tf.zeros((N, 3, 3)), R], 2) ], 1) ee_jac_tf_rbdl[ee] = tf.matmul(RR, ee_local_jac_tf_rbdl) M_tf_rbdl = mass_matrix( tf.convert_to_tensor(q, tf.float32), ic['pidlist'], ic['Mlist'], ic['Glist'], ic['Slist']) + tf.eye(m.model.nq) * ic['joint_armature'] cori = coriolis_forces(tf.convert_to_tensor(q, tf.float32), tf.convert_to_tensor(qdot, tf.float32), ic['pidlist'], ic['Mlist'], ic['Glist'], ic['Slist']) grav = gravity_forces(tf.convert_to_tensor(q, tf.float32), ic['g'], ic['pidlist'], ic['Mlist'], ic['Glist'], ic['Slist']) qfrc_bias_tf_rbdl = cori + grav # ========================================================================== # Comparing # ========================================================================== # print("="*80) # print("q") # print(q) # print("qdot") # print(qdot) # print("="*80) # for i in range(N): # for ee in ee_list: # print("-"*80) # print("{} SE3".format(ee)) # print("Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(ee_SE3_mujoco[ee][i], ee_SE3_tf_rbdl[ee][i].numpy(), np.isclose(ee_SE3_mujoco[ee][i], ee_SE3_tf_rbdl[ee][i].numpy(),atol=1e-5))) # print("-"*80) # print("{} Jacobian".format(ee)) # print("Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(ee_jac_mujoco[ee][i], ee_jac_tf_rbdl[ee][i].numpy(), np.isclose(ee_jac_mujoco[ee][i], ee_jac_tf_rbdl[ee][i].numpy(),atol=1e-5))) # print("-"*80) # print("Mass Matrix") # print("Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(M_mujoco[i], M_tf_rbdl[i].numpy(), np.isclose(M_mujoco[i], M_tf_rbdl[i].numpy(),atol=1e-5))) # print("-"*80) # print("Bias frc") # print("Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format(qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy(), np.isclose(qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy(),atol=1e-5))) # print("="*80) for i in range(N): for ee in ee_list: assert np.allclose( ee_SE3_mujoco[ee][i], ee_SE3_tf_rbdl[ee][i].numpy(), atol=1e-04), "Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format( ee_SE3_mujoco[ee][i], ee_SE3_tf_rbdl[ee][i].numpy(), np.isclose(ee_SE3_mujoco[ee][i], ee_SE3_tf_rbdl[ee][i].numpy())) # print("[{} SE3] PASSED]".format(ee)) assert np.allclose( ee_jac_mujoco[ee][i], ee_jac_tf_rbdl[ee][i].numpy(), atol=1e-04), "Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format( ee_jac_mujoco[ee][i], ee_jac_tf_rbdl[ee][i].numpy(), np.isclose(ee_jac_mujoco[ee][i], ee_jac_tf_rbdl[ee][i].numpy())) # print("[{} Jacobian] PASSED]".format(ee)) assert np.allclose( M_mujoco[i], M_tf_rbdl[i].numpy(), atol=1e-04), "Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format( M_mujoco[i], M_tf_rbdl[i].numpy(), np.isclose(M_mujoco[i], M_tf_rbdl[i].numpy())) # print("[Mass Matrix] PASSED]") assert np.allclose( qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy(), atol=1e-04), "Mujoco\n{}\ntf_rbdl\n{}\nComparison\n{}".format( qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy(), np.isclose(qfrc_bias_mujoco[i], qfrc_bias_tf_rbdl[i].numpy())) # print("[Coriolis + Gravity] PASSED]") print("[PASSED]")