def test_rendering(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() img, depth = sim.render(200, 200, depth=True) assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering.freecam.png') depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth)) depth = np.asarray(depth * 255, dtype=np.uint8) assert depth.shape == (200, 200) compare_imgs(depth, 'test_rendering.freecam.depth.png') img = sim.render(100, 100, camera_name="camera1") assert img.shape == (100, 100, 3) compare_imgs(img, 'test_rendering.camera1.png') img = sim.render(200, 100, camera_name="camera1") assert img.shape == (100, 200, 3) compare_imgs(img, 'test_rendering.camera1.narrow.png') render_context = sim.render_contexts[0] render_context.add_marker(size=np.array([.4, .5, .6]), pos=np.array([.4, .5, .6]), rgba=np.array([.7, .8, .9, 1.0]), label="mark") img = sim.render(200, 200, camera_name="camera1") assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering_markers.camera1.png')
def test_rendering(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() img, depth = sim.render(200, 200, depth=True) assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering.freecam.png') depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth)) depth = np.asarray(depth * 255, dtype=np.uint8) assert depth.shape == (200, 200) # Unfortunately mujoco 2.0 renders slightly different depth image on mac and on linux here if "darwin" in sys.platform.lower(): compare_imgs(depth, 'test_rendering.freecam.depth-darwin.png') else: compare_imgs(depth, 'test_rendering.freecam.depth.png') img = sim.render(100, 100, camera_name="camera1") assert img.shape == (100, 100, 3) compare_imgs(img, 'test_rendering.camera1.png') img = sim.render(200, 100, camera_name="camera1") assert img.shape == (100, 200, 3) compare_imgs(img, 'test_rendering.camera1.narrow.png') render_context = sim.render_contexts[0] render_context.add_marker(size=np.array([.4, .5, .6]), pos=np.array([.4, .5, .6]), rgba=np.array([.7, .8, .9, 1.0]), label="mark") img = sim.render(200, 200, camera_name="camera1") assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering_markers.camera1.png')
def test_concurrent_rendering(): '''Best-effort testing that concurrent multi-threaded rendering works. The test has no guarantees around being deterministic, but if it fails you know something is wrong with concurrent rendering. If it passes, things are probably working.''' err = None def func(sim, event): event.wait() sim.data.qpos[:] = 0.0 sim.forward() img1 = sim.render(width=40, height=40, camera_name="camera1") img2 = sim.render(width=40, height=40, camera_name="camera2") try: assert np.sum(img1[:]) == 23255 assert np.sum(img2[:]) == 12007 except Exception as e: nonlocal err err = e model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.render(100, 100) event = Event() threads = [] for _ in range(100): thread = Thread(target=func, args=(sim, event)) threads.append(thread) thread.start() event.set() for thread in threads: thread.join() assert err is None, "Exception: %s" % (str(err))
def test_multiple_sims(): # Ensure that creating new simulators still produces good renderings. xml = """ <mujoco> <asset> <texture name="t1" width="32" height="32" type="2d" builtin="flat" /> <material name="m1" texture="t1" /> </asset> <worldbody> <light diffuse=".5 .5 .5" pos="0 0 5" dir="0 0 -1" /> <camera name="topcam" pos="0 0 2.5" zaxis="0 0 1" /> <geom name="g1" pos="0 0 0" type="box" size="1 1 0.1" rgba="1 1 1 1" material="m1" /> </worldbody> </mujoco> """ model = load_model_from_xml(xml) random_state = np.random.RandomState(0) for i in range(3): sim = MjSim(model) sim.forward() modder = TextureModder(sim, random_state=random_state) for j in range(2): modder.rand_checker('g1') compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_multiple_sims.loop%d_%d.png' % (i, j))
def test_multiple_sims(): # Ensure that creating new simulators still produces good renderings. xml = """ <mujoco> <asset> <texture name="t1" width="32" height="32" type="2d" builtin="flat" /> <material name="m1" texture="t1" /> </asset> <worldbody> <light diffuse=".5 .5 .5" pos="0 0 5" dir="0 0 -1" /> <camera name="topcam" pos="0 0 2.5" zaxis="0 0 1" /> <geom name="g1" pos="0 0 0" type="box" size="1 1 0.1" rgba="1 1 1 1" material="m1" /> </worldbody> </mujoco> """ model = load_model_from_xml(xml) random_state = np.random.RandomState(0) for i in range(3): sim = MjSim(model) sim.forward() modder = TextureModder(sim, random_state=random_state) for j in range(2): modder.rand_checker('g1') compare_imgs( sim.render(201, 205, camera_name="topcam"), 'test_multiple_sims.loop%d_%d.png' % (i, j))
def _render_depth(sim: mujoco_py.MjSim, camera: str, render_height: int, render_width: int, world_xml: ET.Element): # take screenshot frame = sim.render(render_width * 2, render_height, camera_name=camera, depth=True) return frame[1] # depth buffer
def test_high_res(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() img = sim.render(1000, 1000) img = scipy.misc.imresize(img, (200, 200, 3)) assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering.freecam.png')
def test_high_res(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() img = sim.render(1000, 1000) img = np.array(Image.fromarray(img).resize(size=(200, 200))) assert img.shape == (200, 200, 3) compare_imgs(img, 'test_rendering.freecam.png')
def test_rendering_failing(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() sim.render(100, 100) render_context = sim.render_contexts[0] render_context.add_marker(size=np.array([.4, .5, .6]), pos=np.array([.4, .5, .6]), rgba=np.array([.7, .8, .9, 1.0]), label="blaaaa") img = sim.render(200, 200, camera_name="camera1") assert img.shape == (200, 200, 3) try: compare_imgs(img, 'test_rendering_markers.camera1.png') assert False except Exception as e: pass
def setup_sim(device_id): model = load_model_from_path("xmls/fetch/main.xml") sim = MjSim(model) image = sim.render(IMAGE_WIDTH, IMAGE_HEIGHT, device_id=device_id) assert image.shape == (IMAGE_HEIGHT, IMAGE_WIDTH, 3) return sim
def test_materials(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_materials.premod.png') random_state = np.random.RandomState(0) modder = MaterialModder(sim, random_state=random_state) modder.set_specularity('g1', 1.0) modder.set_reflectance('g2', 1.0) modder.set_shininess('g3', 1.0) compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_materials.props.png') modder.rand_all('g4') compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_materials.rand_all.png')
def _render_seg(sim: mujoco_py.MjSim, camera: str, render_height: int, render_width: int, world_xml: ET.Element): lm = LightModder(sim) # switch all lights off light_names = [l.attrib['name'] for l in world_xml.findall(".//light")] for light_name in light_names: lm.set_active(light_name, 0) # take screenshot frame = sim.render(render_width * 2, render_height, camera_name=camera) # reset lights for light_name in light_names: lm.set_active(light_name, 1) return frame
def test_glfw_context(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() render_context = MjRenderContext(sim, offscreen=True, opengl_backend='glfw') assert len(sim.render_contexts) == 1 assert sim.render_contexts[0] is render_context assert isinstance(render_context.opengl_context, GlfwContext) compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_glfw_context.png') assert len(sim.render_contexts) == 1 assert sim.render_contexts[0] is render_context
def _render_rgb(sim: mujoco_py.MjSim, camera: str, render_height: int, render_width: int, world_xml: ET.Element): lm = LightModder(sim) cam_names = [c.attrib['name'] for c in world_xml.findall(".//camera")] # lights off for all other cameras except the recording one for cam in cam_names: light_name = _get_cam_light_name(cam) if light_name in sim.model.light_names: lm.set_active(light_name, 1 if cam == camera else 0) # take screenshot frame = sim.render(render_width * 2, render_height, camera_name=camera) # reset camera lights for cam in cam_names: light_name = _get_cam_light_name(cam) if light_name in sim.model.light_names: lm.set_active(light_name, 1) return frame
class SimWorker(object): def __init__(self): print('created worker') def create_sim(self): self.sim = MjSim( load_model_from_path( '/mount/visual_mpc/mjc_models/cartgripper_noautogen.xml')) # self.sim = MjSim(load_model_from_path('/mnt/sda1/visual_mpc/mjc_models/cartgripper_noautogen.xml')) def run_sim(self): print('startsim') images = [] for i in range(5): self.sim.step() images.append(self.sim.render(100, 100)) return images
def test_textures(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model) sim.forward() compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.premod.png') random_state = np.random.RandomState(0) modder = TextureModder(sim, random_state=random_state) modder.whiten_materials() modder.whiten_materials(['g1', 'g2']) modder.set_rgb('g1', (255, 0, 0)) modder.set_rgb('g2', (0, 255, 0)) modder.set_rgb('g3', (0, 0, 255)) modder.set_rgb('g4', (255, 0, 255)) compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.rgb.png') modder.set_checker('g1', (255, 0, 0), (0, 255, 0)) modder.set_gradient('g2', (0, 255, 0), (0, 0, 255), vertical=True) modder.set_gradient('g3', (255, 255, 0), (0, 0, 255), vertical=False) modder.set_noise('g4', (0, 0, 255), (255, 0, 0), 0.1) compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.variety.png') modder.rand_checker('g1') modder.rand_gradient('g2') modder.rand_noise('g3') modder.rand_rgb('g4') compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.rand_specific.png') modder.rand_all('g1') modder.rand_all('g2') modder.rand_all('g3') modder.rand_all('g4') compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.rand_all.png') modder.rand_checker('g1') modder.rand_checker('g2') modder.rand_checker('g3') modder.rand_checker('g4') mat_modder = MaterialModder(sim, random_state=random_state) mat_modder.rand_texrepeat('g1') mat_modder.rand_texrepeat('g2') mat_modder.rand_texrepeat('g3') mat_modder.rand_texrepeat('g4') compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_textures.rand_texrepeat.png')
class Mujoco_Renderer(): def __init__(self, mujoco_xml, hp): from mujoco_py import load_model_from_path, MjSim mujoco_xml = '/'.join(str.split(gcp.__file__, '/')[:-1]) + '/' + mujoco_xml self.sim = MjSim(load_model_from_path(mujoco_xml)) self._hp = hp def render(self, qpos): sim_state = self.sim.get_state() sim_state.qpos[:2] = qpos sim_state.qvel[:] = np.zeros_like(self.sim.data.qvel) self.sim.set_state(sim_state) self.sim.forward() subgoal_image = self.sim.render(self._hp.mpar.img_sz, self._hp.mpar.img_sz, camera_name='maincam') # plt.imshow(subgoal_image) # plt.savefig('test.png') return subgoal_image
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, env_name, rand_seed, maximum_length, model_path, frame_skip, misc_info={}): self._env_name = env_name self._rand_seed = rand_seed self._maximum_length = maximum_length self._misc_info = misc_info if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() # Why is this here? Causes errors #observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) #assert not done #self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) # annoying should replace high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed(self._rand_seed) def _seed(self, seed=None): self.np_random, seed = seeding.np_random(self._rand_seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def get_env_state(self): """ Get full state of the environment beyond qpos and qvel For example, if targets are defined using sites, this function should also contain location of the sites (which are not included in qpos). Must return a dictionary that can be used in the set_env_state function """ raise NotImplementedError def set_env_state(self, state): """ Uses the state dictionary to set the state of the world """ raise NotImplementedError # ----------------------------- def reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob, 0, False, {} def reset_soft(self, seed=None): return self._old_obs, 0, False, {} def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 0.5 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self): return None def state_vector(self): state = self.sim.get_state() return np.concatenate([state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t + 1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, frame_size=(640, 480), mode='exploration', save_loc='./tmp/', filename='newvid', it=0, camera_name=None): for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o, *_ = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy(o) o, r, d, _ = self.step(a) t = t + 1 curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1], mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1, :, :]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + str(it) + ".mp4" if not os.path.exists(save_loc): os.makedirs(save_loc) imageio.mimwrite(file_name, np.asarray(arrs), fps=10.0) print("saved", file_name) t1 = timer.time() print("time taken = %f" % (t1 - t0))
class lab_env(): def __init__(self, env, args): #super(lab_env, self).__init__(env) # The real-world simulator self.model = load_model_from_path('xmls/lab_env.xml') self.sim = MjSim(self.model) # Used to locate the gripper self.model2 = load_model_from_path('xmls/light_env.xml') self.sim2 = MjSim(self.model2) def reset(self, task_id): self.task = task_id self.grasping = -1 self.last_grasp = -1 # Configure gravity for i in range(4): self.sim.data.ctrl[i] = -1 # Configure joint positions for i in range(42): self.sim.data.qpos[i] = initial_pos[i] for i in range(3): self.sim.data.qpos[i] = joint_pos[task_id][i] self.pos_forward() self.sim.forward() remapped_pos = [ remap(self.sim.data.qpos[0], -30 / 180 * math.pi, 45 / 180 * math.pi, -1, 1), remap(self.sim.data.qpos[1], -105 / 180 * math.pi, -50 / 180 * math.pi, -1, 1), remap(self.sim.data.qpos[2], 0 / 180 * math.pi, 180 / 180 * math.pi, -1, 1), 0 ] return (remapped_pos, ) + self.get_state() def step(self, action): self.sim.data.qpos[0] = remap(action[0], -1, 1, -30 / 180 * math.pi, 45 / 180 * math.pi) self.sim.data.qpos[1] = remap(action[1], -1, 1, -105 / 180 * math.pi, -50 / 180 * math.pi) self.sim.data.qpos[2] = remap(action[2], -1, 1, 0 / 180 * math.pi, 180 / 180 * math.pi) self.pos_forward() self.sim.forward() if action[3] < self.last_grasp or self.grasping == -1: t = int(remap(action[3], -1, 1, 0, grasp_steps)) for i in range(6, 14): self.sim.data.qpos[i] = 0 self.sim.forward() self.grasping = -1 self.sim.data.ctrl[4] = 1 self.sim.data.ctrl[5] = 1 backup = [ self.sim.data.qpos[i] for i in [15, 16, 22, 23, 29, 30, 36, 37] ] for i in range(t): self.sim.step() stop = False for j in range(4): if self.sim.data.sensordata[j] > sensor_threshold: self.grasping = j self.pickuppos = [ self.sim.data.qpos[i] for i in (list(range(6)) + list( range(14 + 7 * self.grasping, 21 + 7 * self.grasping))) ] stop = True break for i in range(4): for j in range(2): self.sim.data.qpos[15 + 7 * i + j] = backup[i * 2 + j] if stop: break self.gripper_sync() self.sim.forward() self.sim.data.ctrl[4] = 0 self.sim.data.ctrl[5] = 0 self.last_grasp = action[3] return self.get_state() # Ensure that the gripper is always heading down and is parallar to the desk edge def pos_forward(self): self.sim.data.qpos[ 3] = math.pi * 1.5 - self.sim.data.qpos[1] - self.sim.data.qpos[2] self.sim.data.qpos[4] = math.pi * 1.5 self.sim.data.qpos[5] = math.pi * 1.25 + self.sim.data.qpos[0] self.gripper_sync() if self.grasping != -1: current_xyz = pos_to_xyz(self.sim.data.qpos[0:6]) old_xyz = pos_to_xyz(self.pickuppos[0:6]) for i in range(3): self.sim.data.qpos[ 14 + 7 * self.grasping + i] = self.pickuppos[6 + i] + current_xyz[i] - old_xyz[i] ''' old_quat = quaternion(self.pickuppos[9], vector(self.pickuppos[10], self.pickuppos[11], self.pickuppos[12])) rotate_quat = quaternion(math.cos(self.sim.data.qpos[0] - self.pickuppos[0]), vector(0, 0, math.sin(self.sim.data.qpos[0] - self.pickuppos[0]))) new_quat = rotate_quat.mul(old_quat) self.sim.data.qpos[17 + 7 * self.grasping] = new_quat.w self.sim.data.qpos[18 + 7 * self.grasping] = new_quat.v.x self.sim.data.qpos[19 + 7 * self.grasping] = new_quat.v.y self.sim.data.qpos[20 + 7 * self.grasping] = new_quat.v.z ''' def gripper_sync(self): self.sim.data.qpos[9] = gripper_consistent(self.sim.data.qpos[6:9]) self.sim.data.qpos[13] = gripper_consistent(self.sim.data.qpos[10:13]) def get_state(self): sync(self.sim, self.sim2, 6) # Locate the gripper, render twice to overcome bugs in mujoco image_1 = copy.deepcopy( self.sim.render(width=960, height=720, camera_name='workbench_camera')) image_1 = copy.deepcopy( self.sim.render(width=960, height=720, camera_name='workbench_camera')) image_2 = copy.deepcopy( self.sim.render(width=960, height=720, camera_name='upper_camera')) image_2 = copy.deepcopy( self.sim.render(width=960, height=720, camera_name='upper_camera')) image_3 = copy.deepcopy( self.sim2.render(width=960, height=720, camera_name='workbench_camera')) image_3 = copy.deepcopy( self.sim2.render(width=960, height=720, camera_name='workbench_camera')) x1, y1 = get_x_y(image_3) image_4 = copy.deepcopy( self.sim2.render(width=960, height=720, camera_name='upper_camera')) image_4 = copy.deepcopy( self.sim2.render(width=960, height=720, camera_name='upper_camera')) x2, y2 = get_x_y(image_4) # Crop gripper images and add noise image_1 = cv2.GaussianBlur( gaussian_noise( crop(image_1, x1 + fig_size_1[0] // 2, y1, *fig_size_1), *gaussian_noise_parameters), *gaussian_blur_prarmeters).transpose((2, 0, 1)) image_2 = cv2.GaussianBlur( gaussian_noise( crop(image_2, x2 + fig_size_2[0] // 2, y2, *fig_size_2), *gaussian_noise_parameters), *gaussian_blur_prarmeters).transpose((2, 0, 1)) danger = int(self.safety_check() or math.isnan(x1) or math.isnan(y1) or math.isnan(x2) or math.isnan(y2)) return [x1, y1, x2, y2, int(self.grasping == self.task), danger], (image_1, image_2) def safety_check(self): # return 0 if safe, otherwise 1 backup = [self.sim.data.qpos[i] for i in range(14)] self.sim.step() s = 0 for i in range(6): s += abs(backup[i] - self.sim.data.qpos[i]) self.sim.data.qpos[i] = backup[i] return s > safety_threshold def force_close(self): for i in range(2): for j in range(3): self.sim.data.qpos[6 + i * 4 + j] = closed_pos[j] self.gripper_sync() self.sim.forward()
class BaseMujocoEnv(BaseEnv): def __init__(self, model_path, _hp): self._frame_height = _hp.viewer_image_height self._frame_width = _hp.viewer_image_width self._reset_sim(model_path) self._base_adim, self._base_sdim = None, None #state/action dimension of Mujoco control self._adim, self._sdim = None, None #state/action dimension presented to agent self.num_objects, self._n_joints = None, None self._goal_obj_pose = None self._goaldistances = [] self._ncam = _hp.ncam if self._ncam == 2: self.cameras = ['maincam', 'leftcam'] elif self._ncam == 1: self.cameras = ['maincam'] else: raise ValueError self._last_obs = None self._hp = _hp def _default_hparams(self): parent_params = super()._default_hparams() parent_params.add_hparam('viewer_image_height', 480) parent_params.add_hparam('viewer_image_width', 640) parent_params.add_hparam('ncam', 1) return parent_params def set_goal_obj_pose(self, pose): self._goal_obj_pose = pose def _reset_sim(self, model_path): """ Creates a MjSim from passed in model_path :param model_path: Absolute path to model file :return: None """ self._model_path = model_path self.sim = MjSim(load_model_from_path(self._model_path)) def reset(self): self._goaldistances = [] def render(self): """ Renders the enviornment. Implements custom rendering support. If mode is: - dual: renders both left and main cameras - left: renders only left camera - main: renders only main (front) camera :param mode: Mode to render with (dual by default) :return: uint8 numpy array with rendering from sim """ images = np.zeros( (self._ncam, self._frame_height, self._frame_width, 3), dtype=np.uint8) for i, cam in enumerate(self.cameras): images[i] = self.sim.render(self._frame_width, self._frame_height, camera_name=cam) return images def project_point(self, point, camera): model_matrix = np.zeros((4, 4)) model_matrix[:3, :3] = self.sim.data.get_camera_xmat(camera).T model_matrix[-1, -1] = 1 fovy_radians = np.deg2rad( self.sim.model.cam_fovy[self.sim.model.camera_name2id(camera)]) uh = 1. / np.tan(fovy_radians / 2) uw = uh / (self._frame_width / self._frame_height) extent = self.sim.model.stat.extent far, near = self.sim.model.vis.map.zfar * extent, self.sim.model.vis.map.znear * extent view_matrix = np.array([ [uw, 0., 0., 0.], # matrix definition from [ 0., uh, 0., 0. ], # https://stackoverflow.com/questions/18404890/how-to-build-perspective-projection-matrix-no-api [0., 0., far / (far - near), -1.], [0., 0., -2 * far * near / (far - near), 0.] ]) # Note Mujoco doubles this quantity MVP_matrix = view_matrix.dot(model_matrix) world_coord = np.ones((4, 1)) world_coord[:3, 0] = point - self.sim.data.get_camera_xpos(camera) clip = MVP_matrix.dot(world_coord) ndc = clip[:3] / clip[3] # everything should now be in -1 to 1!! col, row = (ndc[0] + 1) * self._frame_width / 2, ( -ndc[1] + 1) * self._frame_height / 2 return self._frame_height - row, col # rendering flipped around in height def get_desig_pix(self, target_width, round=True, obj_poses=None): qpos_dim = self._n_joints # the states contains pos and vel assert self.sim.data.qpos.shape[0] == qpos_dim + 7 * self.num_objects desig_pix = np.zeros([self._ncam, self.num_objects, 2], dtype=np.int) ratio = self._frame_width / target_width for icam, cam in enumerate(self.cameras): for i in range(self.num_objects): if obj_poses is None: fullpose = self.sim.data.qpos[i * 7 + qpos_dim:(i + 1) * 7 + qpos_dim].squeeze() chosen_point = fullpose[:3] else: chosen_point = obj_poses[i, :3] d = self.project_point(chosen_point, cam) d = np.stack(d) / ratio if round: d = np.around(d).astype(np.int) desig_pix[icam, i] = d.squeeze() return desig_pix def get_goal_pix(self, target_width, round=True): goal_pix = np.zeros([self._ncam, self.num_objects, 2], dtype=np.int) ratio = self._frame_width / target_width for icam, cam in enumerate(self.cameras): for i in range(self.num_objects): g = self.project_point(self._goal_obj_pose[i, :3], cam) g = np.stack(g) / ratio if round: g = np.around(g).astype(np.int) goal_pix[icam, i] = g.squeeze() return goal_pix def eval(self, target_width=None, save_dir=None, ntasks=None): self._goaldistances.append(self.get_distance_score()) stats = {} stats['improvement'] = self._goaldistances[0] - self._goaldistances[-1] stats['initial_dist'] = self._goaldistances[0] stats['final_dist'] = self._goaldistances[-1] return stats def get_distance_score(self): """ :return: mean of the distances between all objects and goals """ abs_distances = [] for i_ob in range(self.num_objects): goal_pos = self._goal_obj_pose[i_ob, :3] curr_pos = self.sim.data.qpos[self._n_joints + i_ob * 7:self._n_joints + 3 + i_ob * 7] abs_distances.append(np.linalg.norm(goal_pos - curr_pos)) return np.mean(np.array(abs_distances)) def snapshot_noarm(self): raise NotImplementedError @property def adim(self): return self._adim @property def sdim(self): return self._sdim @property def ncam(self): return self._ncam
class MazeNavigation(Env, utils.EzPickle): def __init__(self): utils.EzPickle.__init__(self) self.hist = self.cost = self.done = self.time = self.state = None dirname = os.path.dirname(__file__) filename = os.path.join(dirname, 'assets/simple_maze.xml') self.sim = MjSim(load_model_from_path(filename)) self.horizon = HORIZON self._max_episode_steps = self.horizon self.transition_function = get_offline_data self.steps = 0 self.images = not GT_STATE self.action_space = Box(-MAX_FORCE * np.ones(2), MAX_FORCE * np.ones(2)) self.transition_function = get_offline_data obs = self._get_obs() self.dense_reward = DENSE_REWARD if self.images: self.observation_space = obs.shape else: self.observation_space = Box(-0.3, 0.3, shape=obs.shape) self.gain = 1.05 self.goal = np.zeros((2, )) self.goal[0] = 0.25 self.goal[1] = 0 def step(self, action): action = process_action(action) self.sim.data.qvel[:] = 0 self.sim.data.ctrl[:] = action cur_obs = self._get_obs() constraint = int(self.sim.data.ncon > 3) if not constraint: for _ in range(500): self.sim.step() obs = self._get_obs() self.sim.data.qvel[:] = 0 self.steps += 1 constraint = int(self.sim.data.ncon > 3) self.done = self.steps >= self.horizon or constraint or ( self.get_distance_score() < GOAL_THRESH) if not self.dense_reward: reward = -(self.get_distance_score() > GOAL_THRESH).astype(float) else: reward = -self.get_distance_score() info = { "constraint": constraint, "reward": reward, "state": cur_obs, "next_state": obs, "action": action, "success": reward > -0.03 } return obs, reward, self.done, info def _get_obs(self, images=False): if images: return self.sim.render(64, 64, camera_name="cam0") # Joint poisitions and velocities state = np.concatenate( [self.sim.data.qpos[:].copy(), self.sim.data.qvel[:].copy()]) if not self.images and not images: return state[:2] # State is just (x, y) position # Get images ims = self.sim.render(64, 64, camera_name="cam0") return ims / 255 def reset(self, difficulty='h', check_constraint=True, pos=()): if len(pos): self.sim.data.qpos[0] = pos[0] self.sim.data.qpos[1] = pos[1] else: if difficulty is None: self.sim.data.qpos[0] = np.random.uniform(-0.27, 0.27) elif difficulty == 'e': self.sim.data.qpos[0] = np.random.uniform(0.14, 0.22) elif difficulty == 'm': self.sim.data.qpos[0] = np.random.uniform(-0.04, 0.04) elif difficulty == 'h': self.sim.data.qpos[0] = np.random.uniform(-0.22, -0.13) self.sim.data.qpos[1] = np.random.uniform(-0.22, 0.22) self.steps = 0 w1 = -0.08 w2 = 0.08 self.sim.model.geom_pos[5, 1] = 0.5 + w1 self.sim.model.geom_pos[7, 1] = -0.25 + w1 self.sim.model.geom_pos[6, 1] = 0.4 + w2 self.sim.model.geom_pos[8, 1] = -0.25 + w2 self.sim.forward() constraint = int(self.sim.data.ncon > 3) if constraint and check_constraint: if not len(pos): self.reset(difficulty) return self._get_obs() def get_distance_score(self): """ :return: mean of the distances between all objects and goals """ d = np.sqrt(np.mean((self.goal - self.sim.data.qpos[:])**2)) return d def expert_action(self): st = self.sim.data.qpos[:] if st[0] <= -0.151: delt = (np.array([-0.15, -0.125]) - st) elif st[0] <= 0.149: delt = (np.array([0.15, 0.125]) - st) else: delt = (np.array([self.goal[0], self.goal[1]]) - st) act = self.gain * delt return act
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip=1, xml_string=""): """ @param model_path path of the default model @param xml_string if given, the model will be reset using these values """ fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.model = load_model_from_path(fullpath) with open(fullpath, 'r') as f: self.model_xml = f.read() self.default_xml = self.model_xml if xml_string != "": self.model = load_model_from_xml(xml_string) self.model_xml = xml_string self.frame_skip = frame_skip self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None self._viewers = {} self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self.step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([ o.size for o in observation ]) if type(observation) is tuple else observation.size high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed() self.set_param_space() def get_params(self): """ Returns a dict of (param_name, param_value) """ return MujocoUpdater(self.model_xml).get_params() def set_params(self, params): """ @param params: dict(param_name, param_value) param_name should be a string of bodyname__type__paramname where type is either geom or joint, e.g. thigh__joint__friction, and param_value is a numpy array """ # invalidate cached properties self.__dict__.pop('action_space', None) self.__dict__.pop('observation_space', None) new_xml = MujocoUpdater.set_params(self.model_xml, params) self.__init__(xml_string=new_xml) self.reset() return self def set_param_space(self, param_space=None, eps_scale=0.5, replace=True): """ Set param_space @param param_space: dict(string, gym.space.base.Space) @param eps_scale: scale of variation applied to all params @param replace: if true, param_space overwrites default param_space. Default behavior is to merge. """ if param_space is not None: if replace: self._param_space = param_space else: self._param_space = {**self._param_space, **param_space} else: params = MujocoUpdater(self.model_xml).get_params() self._param_space = dict() for param, value in params.items(): eps = np.abs(value) * eps_scale ub = value + eps lb = value - eps for name in positive_params: if name in param: lb = np.clip(lb, 1e-3, ub) break space = spaces.Box(lb, ub) self._param_space[param] = space def get_geom_params(self, body_name): geom = MujocoUpdater(self.model_xml).get_geom(body_name) return { k: v for k, v in geom.attrib.items() if k not in MujocoUpdater.ignore_params } def get_joint_params(self, body_name): joint = MujocoUpdater(self.model_xml).get_joint(body_name) return { k: v for k, v in joint.attrib.items() if k not in MujocoUpdater.ignore_params } def get_body_names(self): return MujocoUpdater(self.model_xml).get_body_names() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.sim.data cdists = np.copy(self.model.geom_margin).flat for c in self.sim.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate( [self.sim.data.qpos.flat, self.sim.data.qvel.flat]) @property def _full_state(self): return np.concatenate([ self.sim.data.qpos, self.sim.data.qvel, self.sim.data.qacc, self.sim.data.ctrl, ]).ravel() # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass # ----------------------------- def reset(self, randomize=True): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob # Added for bayesian_rl def get_sim_state(self): return self.sim.get_state() # Added for bayesian_rl def set_sim_state(self, state): self.sim.set_state(state) # Added for bayesian_rl def set_state_vector(self, state_vector): qpos = state_vector[:self.model.nq] qvel = state_vector[self.model.nq:] self.set_state(qpos, qvel) # Added for bayesian_rl def get_state_vector(self): return self.state_vector() def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 1.0 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self, mode): self.viewer = self._viewers.get(mode) if self.viewer is None: if mode == 'human': self.viewer = mujoco_py.MjViewer(self.sim) elif mode == 'rgb_array' or mode == 'depth_array': self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1) self.viewer_setup() self._viewers[mode] = self.viewer return self.viewer def close(self): if self.viewer is not None: # self.viewer.finish() self.viewer = None self._viewers = {} # def step(self, a): # return self._step(a) # Added for bayesian_rl def take_action(self, a): self.step(a) return self.get_sim_state() def state_vector(self): state = self.sim.get_state() return np.concatenate([state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t + 1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['mean'] o, r, d, _ = self.step(a) t = t + 1 curr_frame = self.sim.render(width=640, height=480, mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1, :, :]) print(t, end=', ', flush=True) file_name = save_loc + filename + '_' + str(ep) + ".mp4" skvideo.io.vwrite(file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f" % (t1 - t0)) def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE): if mode == 'rgb_array': self._get_viewer(mode).render(width, height) # window size used for old mujoco-py: data = self._get_viewer(mode).read_pixels(width, height, depth=False) # original image is upside-down, so flip it return data[::-1, :, :] elif mode == 'depth_array': self._get_viewer(mode).render(width, height) # window size used for old mujoco-py: # Extract depth part of the read_pixels() tuple data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1] # original image is upside-down, so flip it return data[::-1, :] elif mode == 'human': self._get_viewer(mode).render()
class MujocoEnv(gym.Env): FILE = None @autoargs.arg('action_noise', type=float, help='Noise added to the controls, which will be ' 'proportional to the action bounds') def __init__(self, action_noise=0.0, file_path=None, template_args=None): # compile template if file_path is None: if self.__class__.FILE is None: raise NotImplementedError("Mujoco file not specified") file_path = osp.join(MODEL_DIR, self.__class__.FILE) if file_path.endswith(".mako"): lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR]) with open(file_path) as template_file: template = mako.template.Template(template_file.read(), lookup=lookup) content = template.render( opts=template_args if template_args is not None else {}, ) tmp_f, file_path = tempfile.mkstemp(suffix=".xml", text=True) with open(file_path, 'w') as f: f.write(content) self.model = load_model_from_path(file_path) os.close(tmp_f) else: self.model = load_model_from_path(file_path) self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None self.render_width = 512 self.render_height = 512 self.render_camera = None self.init_qpos = self.sim.data.qpos self.init_qvel = self.sim.data.qvel self.init_qacc = self.sim.data.qacc self.init_ctrl = self.sim.data.ctrl self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise self.frame_skip = 1 self.dcom = None self.current_com = None self.reset() super().__init__() @cached_property @overrides def action_space(self): bounds = self.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] return gym.spaces.Box(lb, ub, dtype=np.float32) @cached_property @overrides def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return gym.spaces.Box(ub * -1, ub, dtype=np.float32) @property def action_bounds(self): assert isinstance(self.action_space, gym.spaces.Box) return self.action_space.low, self.action_space.high def reset_mujoco(self, init_state=None): self.sim.reset() if init_state is None: self.sim.data.qpos[:] = self.init_qpos + np.random.normal( size=self.init_qpos.shape) * 0.01 self.sim.data.qvel[:] = self.init_qvel + np.random.normal( size=self.init_qvel.shape) * 0.1 self.sim.data.qacc[:] = self.init_qacc self.sim.data.ctrl[:] = self.init_ctrl else: start = 0 for datum_name in ["qpos", "qvel", "qacc", "ctrl"]: datum = getattr(self.sim.data, datum_name) datum_dim = datum.shape[0] datum = init_state[start:start + datum_dim] setattr(self.sim.data, datum_name, datum) start += datum_dim @overrides def reset(self, init_state=None): self.reset_mujoco(init_state) self.sim.forward() self.current_com = self.sim.data.subtree_com[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_obs() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.sim.data cdists = np.copy(self.sim.model.geom_margin).flat for c in self.sim.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, data.cinert.flat, data.cvel.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate( [self.sim.data.qpos.flat, self.sim.data.qvel.flat]) @property def _full_state(self): return np.concatenate([ self.sim.data.qpos, self.sim.data.qvel, self.sim.data.qacc, self.sim.data.ctrl, ]).ravel() def inject_action_noise(self, action): # generate action noise noise = self.action_noise * np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): self.sim.data.ctrl[:] = self.inject_action_noise(action) for _ in range(self.frame_skip): self.sim.step() self.sim.forward() new_com = self.sim.data.subtree_com[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self): if self.viewer is None: self.viewer = MjViewer(self.sim) return self.viewer def render(self, close=False, mode='human'): if mode == 'human': viewer = self.get_viewer() viewer.render() return None elif mode == 'rgb_array': img = self.sim.render(self.render_width, self.render_height, camera_name=self.render_camera) img = img[::-1, :, :] return img if close: self.stop_viewer() return None def start_viewer(self): viewer = self.get_viewer() if not viewer.running: viewer.start() def stop_viewer(self): if self.viewer: self.viewer.finish() def release(self): # temporarily alleviate the issue (but still some leak) functions.mj_deleteModel(self.sim._wrapped) functions.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): return self.data.get_body_xmat(body_name).reshape((3, 3)) def get_body_com(self, body_name): return self.data.get_body_xpos(body_name) def get_body_comvel(self, body_name): return self.data.get_body_xvelp(body_name) def print_stats(self): super(MujocoEnv, self).print_stats() print("qpos dim:\t%d" % len(self.sim.data.qpos)) def action_from_key(self, key): raise NotImplementedError
class BlockPickAndPlaceEnv(): def __init__(self, num_objects, num_colors, img_dim, include_z, random_initialize=False, view=False): # self.asset_path = os.path.join(os.getcwd(), '../mujoco_data/stl/') self.asset_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), '../mujoco_data/stl/') # self.asset_path = os.path.join(os.path.realpath(__file__), 'mujoco_data/stl/') # self.asset_path = '../mujoco_data/stl/' self.img_dim = img_dim self.include_z = include_z self.polygons = ['cube', 'horizontal_rectangle', 'tetrahedron'][:1] self.num_colors = num_colors self.num_objects = num_objects self.view = view #Hyper-parameters self.internal_steps_per_step = 2000 self.drop_height = 3 self.pick_height = 0.59 self.bounds = { 'x_min': -2.5, 'x_max': 2.5, 'y_min': 1.0, 'y_max': 4.0, 'z_min': 0.05, 'z_max': 2.2 } self.TOLERANCE = 0.2 self.wall_pos = np.array([2, 0, 0]) self.names = [] self.blocks = [] self._blank_observation = None if random_initialize: self.reset() ####Env initialization functions def get_unique_name(self, polygon): i = 0 while '{}_{}'.format(polygon, i) in self.names: i += 1 name = '{}_{}'.format(polygon, i) self.names.append(name) return name def add_mesh(self, polygon, pos, quat, rgba): name = self.get_unique_name(polygon) self.blocks.append({ 'name': name, 'polygon': polygon, 'pos': np.array(pos), 'quat': np.array(quat), 'rgba': rgba, 'material': name }) def get_asset_material_str(self): asset_base = '<material name="{}" rgba="{}" specular="0" shininess="0" emission="0.25"/>' asset_list = [ asset_base.format(a['name'], self.convert_to_str(a['rgba'])) for a in self.blocks ] asset_str = '\n'.join(asset_list) return asset_str def get_asset_mesh_str(self): asset_base = '<mesh name="{}" scale="0.6 0.6 0.6" file="{}"/>' asset_list = [ asset_base.format( a['name'], os.path.join(self.asset_path, a['polygon'] + '.stl')) for a in self.blocks ] asset_str = '\n'.join(asset_list) return asset_str def get_body_str(self): body_base = ''' <body name='{}' pos='{}' quat='{}'> <joint type='free' name='{}'/> <geom name='{}' type='mesh' mesh='{}' pos='0 0 0' quat='1 0 0 0' material='{}' condim='3' friction='1 1 1' solimp="0.998 0.998 0.001" solref="0.02 1"/> </body> ''' body_list = [ body_base.format(m['name'], self.convert_to_str(m['pos']), self.convert_to_str(m['quat']), m['name'], m['name'], m['name'], m['material']) for i, m in enumerate(self.blocks) ] body_str = '\n'.join(body_list) return body_str def convert_to_str(self, an_iterable): tmp = "" for an_item in an_iterable: tmp += str(an_item) + " " # tmp = " ".join(str(an_iterable)) return tmp[:-1] def get_random_pos(self, height=None): x = np.random.uniform(self.bounds['x_min'], self.bounds['x_max']) y = np.random.uniform(self.bounds['y_min'], self.bounds['y_max']) if height is None: z = np.random.uniform(self.bounds['z_min'], self.bounds['z_max']) else: z = height return np.array([x, y, z]) def get_random_rbga(self, num_colors): rgb = list(pickRandomColor(num_colors)) return rgb + [1] def initialize(self, use_cur_pos): tmp = MODEL_XML_BASE.format(self.get_asset_mesh_str(), self.get_asset_material_str(), self.get_body_str()) model = load_model_from_xml(tmp) self.sim = MjSim(model) self._blank_observation = self.get_observation() if self.view: self.viewer = MjViewer(self.sim) else: self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1) # self.sim_state = self.sim.get_state() self._get_starting_step(use_cur_pos) def _get_starting_step(self, use_cur_pos): prev_positions = {} for i, aname in enumerate(self.names): if use_cur_pos: prev_positions[aname] = self.get_block_info(aname)["pos"] self.add_block(aname, [-5 + i, -5 + i, -5]) for aname in self.names: if use_cur_pos: tmp_pos = prev_positions[aname] # print(aname, tmp_pos) else: tmp_pos = self.get_random_pos(self.drop_height) self.add_block(aname, tmp_pos) for i in range(self.internal_steps_per_step): self.internal_step() if self.view: self.viewer.render() # self.sim_state = self.sim.get_state() ####Env internal step functions def add_block(self, ablock, pos): #pos (x,y,z) self.set_block_info(ablock, {"pos": pos}) # self.sim.set_state(self.sim_state) def pick_block(self, pos): block_name = None for a_block in self.names: if self.intersect(a_block, pos): block_name = a_block if block_name is None: return False #PICK_LOC = np.array([0, 0, 5]) #info = {"pos":PICK_LOC} #self.set_block_info(block_name, info) # self.sim.set_state(self.sim_state) return block_name def intersect(self, a_block, pos): cur_pos = self.get_block_info(a_block)["pos"] return np.max(np.abs(cur_pos - pos)) < self.TOLERANCE def get_block_info(self, a_block): info = {} info["poly"] = a_block[:-2] info["pos"] = np.copy(self.sim.data.get_body_xpos(a_block)) #np array info["quat"] = np.copy(self.sim.data.get_body_xquat(a_block)) info["vel"] = np.copy(self.sim.data.get_body_xvelp(a_block)) info["rot_vel"] = np.copy(self.sim.data.get_body_xvelr(a_block)) return info def set_block_info(self, a_block, info): # print(a_block, info) # print("Setting state: {}, {}".format(a_block, info)) sim_state = self.sim.get_state() start_ind = self.sim.model.get_joint_qpos_addr(a_block)[0] if "pos" in info: sim_state.qpos[start_ind:start_ind + 3] = np.array(info["pos"]) if "quat" in info: sim_state.qpos[start_ind + 3:start_ind + 7] = info["quat"] else: sim_state.qpos[start_ind + 3:start_ind + 7] = np.array( [1, 0, 0, 0]) start_ind = self.sim.model.get_joint_qvel_addr(a_block)[0] if "vel" in info: sim_state.qvel[start_ind:start_ind + 3] = info["vel"] else: sim_state.qvel[start_ind:start_ind + 3] = np.zeros(3) if "rot_vel" in info: sim_state.qvel[start_ind + 3:start_ind + 6] = info["rot_vel"] else: sim_state.qvel[start_ind + 3:start_ind + 6] = np.zeros(3) self.sim.set_state(sim_state) def internal_step(self, action=None): ablock = False if action is None: self.sim.forward() self.sim.step() else: pick_place = action[:3] drop_place = action[3:] ablock = self.pick_block(pick_place) if (ablock): # print("Dropping: {} {}".format(ablock, drop_place)) self.add_block(ablock, drop_place) # self.sim_state = self.sim.get_state() return ablock ####Env external step functions # Input: action (4) or (6) # Output: resultant observation after taking the action def step(self, action): action = self._pre_process_actions(action) ablock = self.internal_step(action) # print(self.get_env_info()) #if ablock: for i in range(self.internal_steps_per_step): self.sim.forward() self.sim.step() # self.internal_step() if self.view: self.viewer.render() # self.give_down_vel() # for i in range(200): # self.sim.forward() # self.sim.step() # self.sim_state = self.sim.get_state() # for aname in self.names: #This looks incorrect TODO: CHECK THIS # self.add_block(aname, self.get_block_info(aname)["pos"]) return self.get_observation() # Input: action can either be (A) or (T, A) where we want to execute T actions in a row # Output: Single obs def try_step(self, actions): tmp = self.get_env_info() # cur_state = copy.deepcopy(self.sim.get_state()) if len(actions.shape) == 1: self.step(actions) elif len(actions.shape) == 2: for action in actions: self.step(action) else: raise KeyError("Wrong shape for actions: {}".format(actions.shape)) obs = self.get_observation() # self.sim.set_state(cur_state) self.set_env_info(tmp) return obs def reset(self): self.names = [] self.blocks = [] quat = [1, 0, 0, 0] for i in range(self.num_objects): poly = np.random.choice(self.polygons) pos = self.get_random_pos() pos[2] = -2 * (i + 1) self.add_mesh(poly, pos, quat, self.get_random_rbga(self.num_colors)) self.initialize(False) return self.get_observation() def get_observation(self): img = self.sim.render( self.img_dim, self.img_dim, camera_name="fixed" ) # img is upside down, values btwn 0-255 (D,D,3) img = img[::-1, :, :] # flips image right side up (D,D,3) return np.ascontiguousarray(img) # values btwn 0-255 (D,D,3) def get_segmentation_masks(self): cur_obs = self.get_observation() tmp = np.abs(cur_obs - self._blank_observation) #(D,D,3) dif = np.where(tmp > 5, 1, 0).sum(2) #(D,D,3)->(D,D) dif = np.where(dif != 0, 1.0, 0.0) block_seg = dif background_seg = 1 - dif return np.array([background_seg, block_seg]) #Note: output btwn 0-1, (2,D,D) def get_obs_size(self): return [self.img_dim, self.img_dim] def get_actions_size(self): if self.include_z: return [6] else: return [4] # Inputs: actions (*,6) # Outputs: (*,6) if including z, (*,4) if not def _post_process_actions(self, actions): if self.include_z: return actions else: return actions[..., [0, 1, 3, 4]] # Inputs: actions (*,4), or (*,6) # Outputs: actions (*,6) def _pre_process_actions(self, actions): if actions.shape[-1] == 6: return actions full_actions = np.zeros(list(actions.shape)[:-1] + [6]) # (*,6) full_actions[..., [0, 1, 3, 4]] = actions full_actions[..., 2] = self.pick_height full_actions[..., 5] = self.drop_height return full_actions # Inputs: None # Outputs: Returns name of picked block # If self.include z: Pick any random block # Else: Picks a random block which can be picked up with the z pick set to self.pick_height def _get_rand_block_byz(self): if len(self.names) == 0: raise KeyError("No blocks in _get_rand_block_byz()!") if self.include_z: aname = np.random.choice(self.names) else: z_lim = self.pick_height tmp = [ aname for aname in self.names if abs(self.get_block_info(aname)["pos"][2] - z_lim) < self.TOLERANCE ] # while (len(tmp) == 0): # z_lim += 0.5 # tmp = [aname for aname in self.names if self.get_block_info(aname)["pos"][2] <= z_lim] aname = np.random.choice(tmp) # print(tmp, aname) return aname # Input: action_type # Output: Single action either (6) or (4) def sample_action(self, action_type=None, include_noise=True): if len(self.names) == 1 and action_type == 'place_block': action_type = None if action_type == 'pick_block': #pick block, place randomly # aname = np.random.choice(self.names) aname = self._get_rand_block_byz() pick = self.get_block_info(aname)["pos"] place = self.get_random_pos() elif action_type == 'place_block': #pick block, place on top of existing block # aname = np.random.choice(self.names) aname = self._get_rand_block_byz() pick = self.get_block_info(aname)["pos"] # + np.random.randn(3)/10 names = copy.deepcopy(self.names) names.remove(aname) aname = np.random.choice(names) place = self.get_block_info(aname)[ "pos"] # + np.random.randn(3)/10 place[5] += 2 # Each block is roughly 1 unit wide elif action_type == 'remove_block': aname = self._get_rand_block_byz() pick = self.get_block_info(aname)["pos"] # + np.random.randn(3)/50 place = [ 0, 0, -5 ] # Place the block under the ground to remove it from scene elif action_type is None: if self.include_z: pick = self.get_random_pos() place = self.get_random_pos() else: pick = self.get_random_pos(self.pick_height) place = self.get_random_pos(self.drop_height) else: raise KeyError("Wrong input action_type!") ac = np.array(list(pick) + list(place)) if include_noise: ac += np.random.uniform(-self.TOLERANCE, self.TOLERANCE, size=6) * 1.4 # pdb.set_trace() # ac[2] = 0.6 # ac[5] = self.drop_height return self._post_process_actions(ac) #Input: mean (*), std (*) #Output: actions (*) def sample_action_gaussian(self, mean, std): random_a = np.random.normal(mean, std) # # set pick height # random_a[2] = 0.6 # # set place heights # random_a[5] = self.drop_height return random_a # # Input: mean (*), std (*) # # Output: actions (*) # def sample_multiple_action_gaussian(self, mean, std, num_samples): # #mean and std should be (T, A) # random_a = np.random.normal(mean, std, [num_samples] + list(mean.shape)) # # set pick height # random_a[:, :, 2] = 0.6 # # set place height # random_a[:, :, 5] = self.drop_height # return random_a def move_blocks_side(self): # Move blocks to either side z = self.drop_height side_pos = [[-2.2, 1.5, z], [2.2, 1.5, z], [-2.2, 3.5, z], [2.2, 3.5, z]] # self.bounds = {'x_min':-2.5, 'x_max':2.5, 'y_min': 1.0, 'y_max' :4.0, 'z_min':0.05, 'z_max'2.2} place_lst = [] for i, block in enumerate(self.names): place = copy.deepcopy(self.get_block_info(block)["pos"]) place[-1] = self.drop_height self.add_block(block, side_pos[i]) place_lst.append(place) #true_actions.append(side_pos[i] + list(place)) #Note pick & places z's might be # slightly # off # sort by place height so place lowest block first for i in range(self.internal_steps_per_step): self.internal_step() if self.view: self.viewer.render() true_actions = [] for i, block in enumerate(self.names): pick = self.get_block_info(block)["pos"] pick[-1] = 0.6 place = place_lst[i] true_actions.append(np.concatenate([pick, place])) sorted(true_actions, key=lambda x: x[5]) # print(true_actions) return self._post_process_actions(true_actions) def create_tower_shape(self): def get_valid_width_pos(width): num_pos = len(self.heights) possible = [] for i in range(num_pos): valid = True for k in range(max(i - width, 0), min(i + width + 1, num_pos)): if self.types[k] == "tetrahedron": valid = False break if self.heights[i] < self.heights[k]: valid = False break if self.heights[i] >= 3: valid = False break if valid: possible.append(i) return possible def get_drop_pos(index): delta_x = 1 y_val = 3 left_most_x = -2.5 return [left_most_x + index * delta_x, y_val, 4] self.names = [] self.blocks = [] self.heights = [0, 0, 0, 0, 0] self.types = [None] * 5 self.check_clear_width = { 'cube': 1, 'horizontal_rectangle': 1, 'tetrahedron': 1 } self.add_height_width = { 'cube': 0, 'horizontal_rectangle': 1, 'tetrahedron': 0 } tmp_polygons = copy.deepcopy( self.polygons ) #['cube', 'horizontal_rectangle', 'tetrahedron'][:2] quat = [1, 0, 0, 0] for i in range(self.num_objects): poly = np.random.choice(tmp_polygons) tmp = get_valid_width_pos(self.check_clear_width[poly]) if len(tmp) == 0: tmp_polygons.remove(poly) if len(tmp_polygons) == 0: # print("DONE!") break else: continue tmp_polygons = copy.deepcopy(self.polygons) ind = np.random.choice(tmp) # print(poly, tmp, ind) self.update_tower_info(ind, poly) tmp_pos = get_drop_pos(ind) self.add_mesh(poly, tmp_pos, quat, self.get_random_rbga(self.num_colors)) self.num_objects = len(self.names) self.initialize(True) def update_tower_info(self, ind, poly): self.types[ind] = poly width = self.add_height_width[poly] new_height = self.heights[ind] + 1 for i in range(max(ind - width, 0), min(ind + width + 1, len(self.heights))): # print(i, new_height) self.heights[i] = new_height for i in range(1, 4): if self.heights[i - 1] == self.heights[ i + 1] and new_height == self.heights[i - 1]: self.heights[i] = self.heights[i - 1] # print(poly, ind, self.types, self.heights) def get_env_info(self): env_info = {} env_info["names"] = copy.deepcopy(self.names) env_info["blocks"] = copy.deepcopy(self.blocks) for i, aname in enumerate(self.names): info = self.get_block_info(aname) env_info["blocks"][i]["pos"] = copy.deepcopy(info["pos"]) env_info["blocks"][i]["quat"] = copy.deepcopy(info["quat"]) return env_info def set_env_info(self, env_info): self.names = env_info["names"] self.blocks = env_info["blocks"] self.initialize(True) def compute_accuracy(self, true_data, threshold=0.2): mjc_data = copy.deepcopy(true_data) max_err = -float('inf') data = self.get_env_info() for pred_datum in data['blocks']: err, mjc_match, err_pos, err_rgb = self._best_obj_match( pred_datum, mjc_data['blocks']) # del mjc_data[mjc_match] # print(err) if err > max_err: max_err = err max_pos = err_pos max_rgb = err_rgb if len(mjc_data) == 0: break correct = max_err < threshold return correct def _best_obj_match(self, pred, targs): def np_mse(x1, x2): return np.square(x1 - x2).mean() pos = pred['pos'] rgb = pred['rgba'] best_err = float('inf') for obj_data in targs: obj_name = obj_data['name'] obj_pos = obj_data['pos'] obj_rgb = obj_data['rgba'] pos_err = np_mse(pos, obj_pos) rgb_err = np_mse(np.array(rgb), np.array(obj_rgb)) err = pos_err + rgb_err if err < best_err: best_err = err best_obj = obj_name best_pos = pos_err best_rgb = rgb_err return best_err, best_obj, best_pos, best_rgb
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass # ----------------------------- def _reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 1.0 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self): return None def state_vector(self): state = self.sim.get_state() return np.concatenate([ state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 curr_frame = self.sim.render(width=640, height=480, mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1,:,:]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + ".mp4" skvideo.io.vwrite( file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f"% (t1-t0))
class SimManager(object): """Object to generate sequence of images with their transforms""" def __init__(self, filepath, random_params={}, gpu_render=False, gui=False, display_data=False): self.model = load_model_from_path(filepath) self.sim = MjSim(self.model) self.filepath = filepath self.gui = gui self.display_data = display_data # Take the default random params and update anything we need self.RANDOM_PARAMS = {} self.RANDOM_PARAMS.update(random_params) if gpu_render: self.viewer = MjViewer(self.sim) else: self.viewer = None # Get start state of params to slightly jitter later self.START_GEOM_POS = self.model.geom_pos.copy() self.START_GEOM_SIZE = self.model.geom_size.copy() self.START_GEOM_QUAT = self.model.geom_quat.copy() self.START_BODY_POS = self.model.body_pos.copy() self.START_BODY_QUAT = self.model.body_quat.copy() self.START_MATID = self.model.geom_matid.copy() #self.FLOOR_OFFSET = self.model.body_pos[self.model.body_name2id('floor')] self.tex_modder = TextureModder(self.sim) self.tex_modder.whiten_materials( ) # ensures materials won't impact colors self.cam_modder = CameraModder(self.sim) self.light_modder = LightModder(self.sim) self.start_obj_pose = self.sim.data.get_joint_qpos( 'object:joint').copy() def get_data(self, num_images=10): """ Returns camera intrinsics, and a sequence of images, pose transforms, and camera transforms """ # randomize the scene self._rand_textures() self._rand_lights() self._rand_object() self._rand_walls() self._rand_distract() sequence = defaultdict(list) context = {} # object pose obj_pose, robot_pose = self._get_ground_truth() context["obj_world_pose"] = obj_pose context["robot_world_pose"] = robot_pose self._cam_step = 0 self._cam_choices = np.array([[-1.75, 0, 2], [-1.75, 0, 1.62], [-1.3, 1.7, 1.62]]) self._curr_cam_pos = self._cam_choices[0] for i in range(num_images): self._next_camera() self._forward() img = self._get_cam_frame() sequence["img"].append(img) cam_pos = self.cam_modder.get_pos("camera1") cam_quat = self.cam_modder.get_quat("camera1") cam_pose = np.concatenate([cam_pos, cam_quat]).astype(np.float32) sequence["cam_pose"].append(cam_pose) cam_id = self.cam_modder.get_camid("camera1") fovy = self.sim.model.cam_fovy[cam_id] width, height = 640, 480 f = 0.5 * height / math.tan(fovy * math.pi / 360) camera_intrinsics = np.array( ((f, 0, width / 2), (0, f, height / 2), (0, 0, 1))) context['cam_matrix'] = camera_intrinsics return context, sequence def _forward(self): """Advances simulator a step (NECESSARY TO MAKE CAMERA AND LIGHT MODDING WORK) And add some visualization""" self.sim.forward() if self.viewer and self.gui: # Get angle of camera and display it quat = np.quaternion(*self.model.cam_quat[0]) ypr = quaternion.as_euler_angles(quat) * 180 / np.pi cam_pos = self.model.cam_pos[0] cam_fovy = self.model.cam_fovy[0] #self.viewer.add_marker(pos=cam_pos, label="CAM: {}{}".format(cam_pos, ypr)) #self.viewer.add_marker(pos=cam_pos, label="CAM: {}".format(ypr)) #self.viewer.add_marker(pos=cam_pos, label="CAM: {}".format(cam_pos)) self.viewer.add_marker(pos=cam_pos, label="FOVY: {}, CAM: {}".format( cam_fovy, cam_pos)) self.viewer.render() def _get_ground_truth(self): """ Return the position to the robot, and quaternion to the robot quaternion 7 dim total """ robot_gid = self.sim.model.geom_name2id('base_link') obj_gid = self.sim.model.geom_name2id('object') # only x and y pos needed obj_world_pos = self.sim.data.geom_xpos[obj_gid] robot_world_pos = self.sim.data.geom_xpos[robot_gid] # obj_pos_in_robot_frame = (self.sim.data.geom_xpos[obj_gid] - self.sim.data.geom_xpos[robot_gid])[:2] # robot_quat = quaternion.as_quat_array(self.model.geom_quat[robot_gid].copy()) obj_world_quat = self.model.geom_quat[obj_gid].copy() robot_world_quat = self.model.geom_quat[robot_gid].copy() # # want quat of obj relative to robot frame # # obj_q = robot_q * localrot # # robot_q.inv * obj_q = localrot # rel_quat = quaternion.as_float_array(robot_quat.inverse() * obj_quat) # pose = np.concatenate([obj_pos_in_robot_frame, rel_quat]).astype(np.float32) obj_pose = self.sim.data.get_joint_qpos('object:joint').copy() robot_pose = np.concatenate([robot_world_pos, robot_world_quat]).astype(np.float32) return obj_pose, robot_pose def _get_cam_frame(self, ground_truth=None): """Grab an image from the camera (224, 244, 3) to feed into CNN""" #IMAGE_NOISE_RVARIANCE = Range(0.0, 0.0001) cam_img = self.sim.render( 640, 480, camera_name='camera1' )[::-1, :, :] # Rendered images are upside-down. # make camera crop be more like kinect #cam_img = self.sim.render(854, 480, camera_name='camera1')[::-1, 107:-107, :] # Rendered images are upside-down. #image_noise_variance = sample(IMAGE_NOISE_RVARIANCE) #cam_img = (skimage.util.random_noise(cam_img, mode='gaussian', var=image_noise_variance) * 255).astype(np.uint8) if self.display_data: print(ground_truth) #label = str(ground_truth[3:6]) display_image(cam_img, mode='preproc') #, label) # cam_img = preproc_image(cam_img) return cam_img def _randomize(self): self._rand_textures() self._rand_camera() self._rand_lights() #self._rand_robot() self._rand_object() self._rand_walls() self._rand_distract() def _rand_textures(self): """Randomize all the textures in the scene, including the skybox""" bright = np.random.binomial(1, 0.5) for name in self.sim.model.geom_names + ('skybox', ): self.tex_modder.rand_all(name) if bright: self.tex_modder.brighten(name, np.random.randint(0, 150)) def _rand_camera(self): """Randomize pos, orientation, and fov of camera real camera pos is -1.75, 0, 1.62 FOVY: Kinect2 is 53.8 ASUS is 45 https://www.asus.com/us/3D-Sensor/Xtion_PRO_LIVE/specifications/ http://smeenk.com/kinect-field-of-view-comparison/ """ # Params FOVY_R = Range(40, 50) #X = Range(-3, -1) #Y = Range(-1, 3) #Z = Range(1, 2) #C_R3D = Range3D(X, Y, Z) #cam_pos = sample_xyz(C_R3D) #L_R3D = rto3d([-0.1, 0.1]) C_R3D = Range3D([-0.07, 0.07], [-0.07, 0.07], [-0.07, 0.07]) ANG3 = Range3D([-3, 3], [-3, 3], [-3, 3]) # Look approximately at the robot, but then randomize the orientation around that cam_choices = np.array([[-1.75, 0, 1.62], [-1.3, 1.7, 1.62], [-1.75, 0, 2]]) cam_pos = cam_choices[np.random.choice(len(cam_choices))] # cam_pos = get_real_cam_pos(FLAGS.real_data_path) target_id = self.model.body_name2id(FLAGS.look_at) cam_off = 0 #sample_xyz(L_R3D) target_off = 0 #sample_xyz(L_R3D) quat = look_at(cam_pos + cam_off, self.sim.data.body_xpos[target_id] + target_off) quat = jitter_angle(quat, ANG3) #quat = jitter_quat(quat, 0.01) cam_pos += sample_xyz(C_R3D) self.cam_modder.set_quat('camera1', quat) self.cam_modder.set_pos('camera1', cam_pos) self.cam_modder.set_fovy('camera1', 60) # hard code to wide fovy def _next_camera(self): """Randomize pos, orientation, and fov of camera real camera pos is -1.75, 0, 1.62 FOVY: Kinect2 is 53.8 ASUS is 45 https://www.asus.com/us/3D-Sensor/Xtion_PRO_LIVE/specifications/ http://smeenk.com/kinect-field-of-view-comparison/ """ # Params FOVY_R = Range(40, 50) #X = Range(-3, -1) #Y = Range(-1, 3) #Z = Range(1, 2) #C_R3D = Range3D(X, Y, Z) #cam_pos = sample_xyz(C_R3D) #L_R3D = rto3d([-0.1, 0.1]) C_R3D = Range3D([-0.07, 0.07], [-0.07, 0.07], [-0.07, 0.07]) ANG3 = Range3D([-3, 3], [-3, 3], [-3, 3]) # Look approximately at the robot, but then randomize the orientation around that # linearly interpolate to the next camera every K steps K = 5 goal_cam_pos = self._cam_choices[(self._cam_step // K) + 1] offset = goal_cam_pos - self._curr_cam_pos offset *= (self._cam_step % K) / K self._curr_cam_pos += offset cam_pos = self._curr_cam_pos self._cam_step += 1 # cam_pos = cam_choices[np.random.choice(len(cam_choices))] # cam_pos = get_real_cam_pos(FLAGS.real_data_path) target_id = self.model.body_name2id(FLAGS.look_at) cam_off = 0 #sample_xyz(L_R3D) target_off = 0 #sample_xyz(L_R3D) quat = look_at(cam_pos + cam_off, self.sim.data.body_xpos[target_id] + target_off) quat = jitter_angle(quat, ANG3) #quat = jitter_quat(quat, 0.01) cam_pos += sample_xyz(C_R3D) self.cam_modder.set_quat('camera1', quat) self.cam_modder.set_pos('camera1', cam_pos) self.cam_modder.set_fovy('camera1', 60) # hard code to wide fovy def _rand_lights(self): """Randomize pos, direction, and lights""" # light stuff #X = Range(-1.5, 1.5) #Y = Range(-1.2, 1.2) #Z = Range(0, 2.8) X = Range(-1.5, -0.5) Y = Range(-0.6, 0.6) Z = Range(1.0, 1.5) LIGHT_R3D = Range3D(X, Y, Z) LIGHT_UNIF = Range3D(Range(0, 1), Range(0, 1), Range(0, 1)) # TODO: also try not altering the light dirs and just keeping them at like -1, or [0, -0.15, -1.0] for i, name in enumerate(self.model.light_names): lid = self.model.light_name2id(name) # random sample 80% of any given light being on if lid != 0: self.light_modder.set_active(name, sample([0, 1]) < 0.8) self.light_modder.set_dir(name, sample_light_dir()) self.light_modder.set_pos(name, sample_xyz(LIGHT_R3D)) #self.light_modder.set_dir(name, sample_xyz(rto3d([-1,1]))) #self.light_modder.set_specular(name, sample_xyz(LIGHT_UNIF)) #self.light_modder.set_diffuse(name, sample_xyz(LIGHT_UNIF)) #self.light_modder.set_ambient(name, sample_xyz(LIGHT_UNIF)) spec = np.array([sample(Range(0.5, 1))] * 3) diffuse = np.array([sample(Range(0.5, 1))] * 3) ambient = np.array([sample(Range(0.5, 1))] * 3) self.light_modder.set_specular(name, spec) self.light_modder.set_diffuse(name, diffuse) self.light_modder.set_ambient(name, ambient) #self.model.light_directional[lid] = sample([0,1]) < 0.2 self.model.light_castshadow[lid] = sample([0, 1]) < 0.5 def _rand_robot(self): """Randomize joint angles and jitter orientation""" jnt_shape = self.sim.data.qpos.shape self.sim.data.qpos[:] = sample_joints(self.model.jnt_range, jnt_shape) robot_gid = self.model.geom_name2id('robot_table_link') self.model.geom_quat[robot_gid] = jitter_quat( self.START_GEOM_QUAT[robot_gid], 0.01) def _rand_object(self): obj_gid = self.sim.model.geom_name2id('object') obj_bid = self.sim.model.geom_name2id('object') table_gid = self.model.geom_name2id('object_table') table_bid = self.model.body_name2id('object_table') obj_pose = self.start_obj_pose.copy() xval = self.model.geom_size[table_gid][ 0] #- self.model.geom_size[obj_gid][0] yval = self.model.geom_size[table_gid][ 1] #- self.model.geom_size[obj_gid][1] O_X = Range(-xval, xval) O_Y = Range(-yval, yval) O_Z = Range(0, 0) O_R3D = Range3D(O_X, O_Y, O_Z) newpos = obj_pose[:3] + sample_xyz(O_R3D) newquat = jitter_quat(obj_pose[3:], 0.1) obj_pose[:3] = newpos obj_pose[3:] = newquat self.sim.data.set_joint_qpos('object:joint', obj_pose) #T_X = Range(-0.1, 0.1) #T_Y = Range(-0.1, 0.1) #T_Z = Range(-0.1, 0.1) #T_R3D = Range3D(T_X, T_Y, T_Z) #self.model.body_pos[table_bid] = self.START_BODY_POS[table_bid] + sample_xyz(T_R3D) ## randomize orientation a wee bit #self.model.geom_quat[table_gid] = jitter_quat(self.START_GEOM_QUAT[table_gid], 0.01) def _rand_walls(self): wall_bids = { name: self.model.body_name2id(name) for name in ['wall_' + dir for dir in 'nesw'] } window_gid = self.model.geom_name2id('west_window') #floor_gid = self.model.geom_name2id('floor') WA_X = Range(-0.2, 0.2) WA_Y = Range(-0.2, 0.2) WA_Z = Range(-0.1, 0.1) WA_R3D = Range3D(WA_X, WA_Y, WA_Z) WI_X = Range(-0.1, 0.1) WI_Y = Range(0, 0) WI_Z = Range(-0.5, 0.5) WI_R3D = Range3D(WI_X, WI_Y, WI_Z) R = Range(0, 0) P = Range(-10, 10) Y = Range(0, 0) RPY_R = Range3D(R, P, Y) #self.model.geom_quat[floor_gid] = jitter_quat(self.START_GEOM_QUAT[floor_gid], 0.01) #self.model.geom_pos[floor_gid] = self.START_GEOM_POS[floor_gid] + [0,0,sample(-0.1,0.1) self.model.geom_quat[window_gid] = sample_quat(RPY_R) #self.model.geom_quat[window_gid] = jitter_quat(self.START_GEOM_QUAT[window_gid], 0.01) self.model.geom_pos[ window_gid] = self.START_GEOM_POS[window_gid] + sample_xyz(WI_R3D) for name in wall_bids: gid = wall_bids[name] self.model.body_quat[gid] = jitter_quat(self.START_BODY_QUAT[gid], 0.01) self.model.body_pos[gid] = self.START_BODY_POS[gid] + sample_xyz( WA_R3D) def _rand_distract(self): PREFIX = 'distract' geom_names = [ name for name in self.model.geom_names if name.startswith(PREFIX) ] # Size range SX = Range(0.01, 0.5) SY = Range(0.01, 0.9) SZ = Range(0.01, 0.5) S3D = Range3D(SX, SY, SZ) # Back range B_PX = Range(-0.5, 2) B_PY = Range(-1.5, 2) B_PZ = Range(0, 3) B_P3D = Range3D(B_PX, B_PY, B_PZ) # Front range F_PX = Range(-2, -0.5) F_PY = Range(-2, 1) F_PZ = Range(0, 0.5) F_P3D = Range3D(F_PX, F_PY, F_PZ) for name in geom_names: gid = self.model.geom_name2id(name) range = B_P3D if np.random.binomial(1, 0.5) else F_P3D self.model.geom_pos[gid] = sample_xyz(range) self.model.geom_quat[gid] = random_quat() self.model.geom_size[gid] = sample_xyz(S3D, mode='logspace') self.model.geom_type[gid] = sample_geom_type() self.model.geom_rgba[gid][-1] = np.random.binomial(1, 0.5) def _set_visible(self, prefix, range_top, visible): """Helper function to set visibility of several objects""" if not visible: if range_top == 0: name = prefix gid = self.model.geom_name2id(name) self.model.geom_rgba[gid][-1] = 0.0 for i in range(range_top): name = "{}{}".format(prefix, i) gid = self.model.geom_name2id(name) self.model.geom_rgba[gid][-1] = 0.0 else: if range_top == 0: name = prefix gid = self.model.geom_name2id(name) self.model.geom_rgba[gid][-1] = 1.0 for i in range(range_top): name = "{}{}".format(prefix, i) gid = self.model.geom_name2id(name) self.model.geom_rgba[gid][-1] = 1.0
class DeterministicGraspPolicy(Policy): def __init__(self, agentparams, policyparams): Policy.__init__(self) self.agentparams = agentparams self.min_lift = agentparams.get('min_z_lift', 0.05) self.policyparams = policyparams self.adim = agentparams['adim'] self.n_actions = policyparams['nactions'] if 'num_samples' in self.policyparams: self.M = self.policyparams['num_samples'] else: self.M = 20 # number of CEM Samples if 'best_to_take' in self.policyparams: self.K = self.policyparams['best_to_take'] else: self.K = 5 # best samples to average for next sampling assert self.adim >= 4, 'must have at least x, y, z + gripper actions' self.moveto = True self.drop = False self.grasp = False self.lift = False if 'iterations' in self.policyparams: self.niter = self.policyparams['iterations'] else: self.niter = 10 # number of iterations self.imgs = [] self.iter = 0 def setup_CEM_model(self, t, init_model): if t == 0: if 'gen_xml' in self.agentparams: self.CEM_model = MjSim( load_model_from_path(self.agentparams['gen_xml_fname'])) else: self.CEM_model = MjSim( load_model_from_path(self.agentparams['filename'])) self.initial_qpos = init_model.data.qpos.copy() self.initial_qvel = init_model.data.qvel.copy() self.reset_CEM_model() def reset_CEM_model(self): if len(self.imgs) > 0: print('saving iter', self.iter, 'with frames:', len(self.imgs)) npy_to_gif( self.imgs, os.path.join(self.agentparams['record'], 'iter_{}'.format(self.iter))) self.iter += 1 sim_state = self.CEM_model.get_state() sim_state.qpos[:] = self.initial_qpos.copy() sim_state.qvel[:] = self.initial_qvel.copy() self.CEM_model.set_state(sim_state) self.prev_target = self.CEM_model.data.qpos[:self.adim].squeeze().copy( ) self.target = self.CEM_model.data.qpos[:self.adim].squeeze().copy() for _ in range(5): self.step_model(self.target) self.imgs = [] def viewer_refresh(self): if 'debug_viewer' in self.policyparams and self.policyparams[ 'debug_viewer']: large_img = self.CEM_model.render( 640, 480, camera_name="maincam")[::-1, :, :] self.imgs.append(large_img) def perform_CEM(self, targetxy): self.reset_CEM_model() if 'object_meshes' in self.agentparams: targetxy = self.CEM_model.data.sensordata[:2].squeeze().copy() print('Beginning CEM') ang_dis_mean = self.policyparams['init_mean'].copy() ang_dis_cov = self.policyparams['init_cov'].copy() scores = np.zeros(self.M) best_score, best_ang, best_xy = -1, None, None for n in range(self.niter): ang_disp_samps = np.random.multivariate_normal(ang_dis_mean, ang_dis_cov, size=self.M) for s in range(self.M): #print('On iter', n, 'sample', s) self.reset_CEM_model() move = True drop = False grasp = False g_start = 0 lift = False angle_delta = ang_disp_samps[s, 0] targetxy_delta = targetxy + ang_disp_samps[s, 1:] print('init iter') print(targetxy) print(angle_delta, targetxy_delta) for t in range(self.n_actions): angle_action = np.zeros(self.adim) cur_xy = self.CEM_model.data.qpos[:2].squeeze() if move and np.linalg.norm( targetxy_delta - cur_xy, 2) <= self.policyparams['drop_thresh']: move = False drop = True if drop and self.CEM_model.data.qpos[2] <= -0.079: drop = False grasp = True g_start = t if grasp and t - g_start > 2: grasp = False lift = True if move: angle_action[:2] = targetxy_delta angle_action[2] = self.agentparams['ztarget'] angle_action[3] = angle_delta angle_action[4] = -100 elif drop: angle_action[:2] = targetxy_delta angle_action[2] = -0.08 angle_action[3] = angle_delta angle_action[4] = -100 elif grasp: angle_action[:2] = targetxy_delta angle_action[2] = -0.08 angle_action[3] = angle_delta angle_action[4] = 21 elif lift: angle_action[:2] = targetxy_delta angle_action[2] = self.agentparams['ztarget'] angle_action[3] = angle_delta angle_action[4] = 21 self.step_model(angle_action) # print 'final z', self.CEM_model.data.qpos[8].squeeze(), 'with angle', angle_samps[s] if 'object_meshes' in self.agentparams: obj_z = self.CEM_model.data.sensordata[2].squeeze() else: obj_z = self.CEM_model.data.qpos[8].squeeze() print('obj_z at iter', n * self.M + s, 'is', obj_z) scores[s] = obj_z if 'stop_iter_thresh' in self.policyparams and scores[ s] > self.policyparams['stop_iter_thresh']: return ang_disp_samps[s, 0], ang_disp_samps[s, 1:] # print 'score',scores[s] best_scores = np.argsort(-scores)[:self.K] if scores[best_scores[0]] > best_score or best_ang is None: #print('best', scores[best_scores[0]]) best_score = scores[best_scores[0]] best_ang = ang_disp_samps[best_scores[0], 0] best_xy = ang_disp_samps[best_scores[0], 1:] ang_dis_mean = np.mean(ang_disp_samps[best_scores, :], axis=0) ang_dis_cov = np.cov(ang_disp_samps[best_scores, :].T) return best_ang, best_xy def step_model(self, input_actions): pos_clip = self.agentparams['targetpos_clip'] self.prev_target = self.target.copy() self.target = input_actions.copy() self.target = np.clip(self.target, pos_clip[0], pos_clip[1]) for s in range(self.agentparams['substeps']): step_action = s / float(self.agentparams['substeps']) * ( self.target - self.prev_target) + self.prev_target self.CEM_model.data.ctrl[:] = step_action self.CEM_model.step() self.viewer_refresh() #print "end", self.CEM_model.data.qpos[:4].squeeze() def act(self, traj, t, init_model=None, goal_object_pose=None, hyperparams=None, goal_image=None): """ Scripted pick->place->wiggle trajectory. There's probably a better way to script this but booleans will suffice for now. """ self.setup_CEM_model(t, init_model) if t == 0: self.moveto = True self.drop = False self.lift = False self.grasp = False self.second_moveto = False self.final_drop = False self.wiggle = False self.switchTime = 0 print('start pose', traj.Object_pose[t, 0, :3]) self.targetxy = traj.Object_pose[t, 0, :2] self.angle, self.disp = self.perform_CEM(self.targetxy) print('best angle', self.angle, 'best target', self.targetxy) self.targetxy += self.disp traj.desig_pos = np.zeros((2, 2)) traj.desig_pos[0] = self.targetxy.copy() if self.lift and traj.X_full[t, 2] >= self.min_lift: self.lift = False if traj.Object_full_pose[t, 0, 2] > -0.01: #lift occursed self.second_moveto = True self.targetxy = np.random.uniform(-0.2, 0.2, size=2) print("LIFTED OBJECT!") print('dropping at', self.targetxy) traj.desig_pos[1] = self.targetxy.copy() else: self.wiggle = True if self.grasp and self.switchTime > 0: print('lifting at time', t, '!', 'have z', traj.X_full[t, 2]) self.grasp = False self.lift = True if self.drop and (traj.X_full[t, 2] <= -0.079 or self.switchTime >= 2): print('grasping at time', t, '!', 'have z', traj.X_full[t, 2]) self.drop = False self.grasp = True self.switchTime = 0 if self.moveto and (np.linalg.norm(traj.X_full[t, :2] - self.targetxy, 2) <= self.policyparams['drop_thresh']): if self.switchTime > 0: print('stopping moveto at time', t, '!') print(traj.X_full[t, :2], self.targetxy) self.moveto = False self.drop = True self.switchTime = 0 else: self.switchTime += 1 if self.second_moveto and np.linalg.norm( traj.X_full[t, :2] - self.targetxy, 2) <= self.policyparams['drop_thresh']: self.second_moveto = False self.final_drop = True self.switchTime = 0 actions = np.zeros(self.adim) if self.moveto or self.second_moveto: delta = np.zeros(3) delta[:2] = self.targetxy - traj.target_qpos[t, :2] if 'xyz_std' in self.policyparams: delta += self.policyparams['xyz_std'] * np.random.normal( size=3) norm = np.sqrt(np.sum(np.square(delta))) if norm > self.policyparams['max_norm']: delta *= self.policyparams['max_norm'] / norm actions[:3] = traj.target_qpos[t, :3] + delta actions[2] = self.agentparams['ztarget'] actions[3] = self.angle if self.moveto: actions[-1] = -1 else: actions[-1] = 1 self.switchTime += 1 elif self.drop: actions[:2] = self.targetxy actions[2] = -0.08 actions[3] = self.angle actions[-1] = -1 self.switchTime += 1 elif self.lift: actions[:2] = self.targetxy actions[2] = self.agentparams['ztarget'] actions[3] = self.angle actions[-1] = 1 elif self.grasp: actions[:2] = self.targetxy actions[2] = -0.08 actions[3] = self.angle actions[-1] = 1 self.switchTime += 1 elif self.final_drop: if self.switchTime > 0: print('opening') actions[:2] = self.targetxy actions[2] = -0.08 actions[3] = self.angle actions[-1] = -1 self.switchTime += 1 if self.switchTime > 1: print('up') actions[:2] = self.targetxy actions[2] = self.agentparams['ztarget'] + np.random.uniform( -0.03, 0.05) actions[3] = self.angle + np.random.uniform( -np.pi / 4., np.pi / 4.) actions[-1] = -1 self.final_drop = False self.wiggle = True self.switchTime = 0 else: actions[:2] = self.targetxy actions[2] = -0.08 actions[3] = self.angle actions[-1] = -1 self.switchTime += 1 elif self.wiggle: delta_vec = np.random.normal(size=2) norm = np.sqrt(np.sum(np.square(delta_vec))) if norm > self.policyparams['max_norm']: delta_vec *= self.policyparams['max_norm'] / norm actions[:2] = np.clip(traj.target_qpos[t, :2] + delta_vec, -0.15, 0.15) delta_z = np.random.uniform(-0.08 - traj.target_qpos[t, 2], 0.3 - traj.target_qpos[t, 2]) actions[2] = traj.target_qpos[t, 2] + delta_z actions[3] = traj.target_qpos[t, 3] + np.random.uniform(-0.1, 0.1) if np.random.uniform() > 0.5: actions[4] = -1 else: actions[4] = 1 if 'angle_std' in self.policyparams: actions[3] += self.policyparams['angle_std'] * np.random.normal() return actions - traj.target_qpos[t, :] * traj.mask_rel
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass def evaluate_success(self, paths, logger=None): """ Log various success metrics calculated based on input paths into the logger """ pass # ----------------------------- def _reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 0.5 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self): return None def state_vector(self): state = self.sim.get_state() return np.concatenate([ state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, frame_size=(640,480), mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1], mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1,:,:]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + ".mp4" skvideo.io.vwrite( file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f"% (t1-t0))
</mujoco> """ model = load_model_from_xml(MODEL_XML) sim = MjSim(model) print("Nicely exposed function:\n") print(sim.model.get_xml()) print("\nversus MuJoCo internals:\n\n") functions.mj_saveLastXML("/tmp/saved.xml", model, "", 0) with open("/tmp/saved.xml", "r") as f: print(f.read()) sim.render(100, 100) modelpos = np.zeros(3) modelquat = np.zeros(4) roompos = np.ones(3) roomquat = np.array([1., 0., 1., 0.]) functions.mjv_room2model(modelpos, modelquat, roompos, roomquat, sim.render_contexts[0].scn) print("\n\nAnother internal function, mjv_room2model:") print("modelpos = %s, modelquat = %s" % (str(modelpos), str(modelquat))) res = np.zeros(9) functions.mju_quat2Mat(res, roomquat) print("\n\nAnother internal function, mju_quat2Mat:\n%s" % res)
pixcount = pixcount + 1 #else : #grayimg[i][j] = 0 position = np.array([pixwx / pixcount, pixwy / pixcount]) return position while True: sim.set_state(sim_state) #for j in range(0,6) : sim.data.ctrl[:] = 0.0 #for i in range (0, 2000) : #sim.data.ctrl[j] = -3.14159 + (i * 6.283)/2000.0 sim.step() viewer.render() img = sim.render(256, 256, camera_name="cam3", depth=True) print(type(img)) print(img[1]) print(np.max(img[1])) print(np.min(img[1])) kk = img[1] #kk = #kk * 1600.0 ''' for i in range (0,256): for j in range(0,256): #if kk[i][j] > 256: val = (kk[i][j] - 0.5945638) /(0.99736613 -0.5945638) *1500 if val > 255: val =255 kk[i][j] = val
def take_images(self, filename, obj, writer, img_idx=0, debug=False): model = load_model_from_path(filename) sim = MjSim(model) modder= TextureModder(sim) # viewer=MjViewer(sim) # this f*****g line has caused me so much pain. embedding = np.append(obj.type, obj.geom.reshape(-1)) if obj.type == 4 or obj.type == 5: # MULTI CABINET: get double the params. axis1, door1 = transform_param(obj.params[0][0],obj.params[0][1], obj) axis2, door2 = transform_param(obj.params[1][0],obj.params[1][1], obj) axes = np.append(axis1,axis2) doors = np.append(door1,door2) params = np.append(axes,doors) set_two_door_control(sim, 'cabinet2' if obj.type == 4 else 'refrigerator') n_qpos_variables = 2 else: n_qpos_variables = 1 if obj.type == 1: sim.data.ctrl[0] = 0.05 elif obj.geom[3] == 1: sim.data.ctrl[0] = -0.2 else: sim.data.ctrl[0] = 0.2 params = get_cam_relative_params2(obj) # if 1DoF, params is length 10. If 2DoF, params is length 20. embedding_and_params = np.concatenate((embedding, params, obj.pose, obj.rotation)) # print('nqpos', n_qpos_variables) # print(self.img_idx, obj.pose) # print(embedding_and_params.shape) t = 0 while t<4000: sim.forward() sim.step() if t%250==0: ######################### IMG_WIDTH = calibrations.sim_width IMG_HEIGHT = calibrations.sim_height ######################### img, depth = sim.render(IMG_WIDTH, IMG_HEIGHT, camera_name='external_camera_0', depth=True) depth = vertical_flip(depth) real_depth = buffer_to_real(depth, 12.0, 0.1) norm_depth = real_depth / 12.0 if self.masked: # remove background mask = norm_depth > 0.99 norm_depth = (1-mask)*norm_depth if self.debugging: # save image to disk for visualization # img = cv2.resize(img, (IMG_WIDTH,IMG_HEIGHT)) img = vertical_flip(img) img = white_bg(img) imgfname = os.path.join(self.savedir, 'img'+str(self.img_idx).zfill(6)+'.png') depth_imgfname = os.path.join(self.savedir, 'depth_img'+str(self.img_idx).zfill(6)+'.png') integer_depth = norm_depth * 255 cv2.imwrite(imgfname, img) cv2.imwrite(depth_imgfname, integer_depth) # noise_img = sp_noise(img,0.1) # cv2.imwrite(imgfname, noise_img) # noise_integer_depth = sp_noise(integer_depth,0.1) # cv2.imwrite(depth_imgfname, noise_integer_depth) # if IMG_WIDTH != 192 or IMG_HEIGHT != 108: # depth = cv2.resize(norm_depth, (192,108)) depthfname = os.path.join(self.savedir,'depth'+str(self.img_idx).zfill(6) + '.pt') torch.save(torch.tensor(norm_depth.copy()), depthfname) row = np.append(embedding_and_params, sim.data.qpos[:n_qpos_variables]) # print(row.shape) writer.writerow(row) self.img_idx += 1 t += 1
class JuggleEnv: def __init__(self): self.control_freq: float = 50.0 self.control_timestep: float = 1.0 / self.control_freq self.viewer = None self.horizon = 1000 self.target = np.array([0.8, 0.0, 1.9]) # load model self.robot: Robot = None self.arena: Arena = None self.pingpong: MujocoGeneratedObject = None self.model: MujocoWorldBase = None self._load_model() # initialize simulation self.mjpy_model = None self.sim: MjSim = None self.model_timestep: float = 0.0 self._initialize_sim() # reset robot, object and internel variables self.cur_time: float = 0.0 self.timestep: int = 0.0 self.done: bool = False self._pingpong_body_id: int = -1 self._paddle_body_id: int = -1 self._reset_internel() # internel variable for scoring self._below_plane = False self.plane_height = 1.5 def _load_model(self): # Load the desired controller's default config as a dict controller_config = load_controller_config( default_controller="JOINT_VELOCITY") controller_config["output_max"] = 1.0 controller_config["output_min"] = -1.0 robot_noise = {"magnitude": [0.05] * 7, "type": "gaussian"} self.robot = SingleArm( robot_type="IIWA", idn=0, controller_config=controller_config, initial_qpos=[0.0, 0.7, 0.0, -1.4, 0.0, -0.56, 0.0], initialization_noise=robot_noise, gripper_type="PaddleGripper", gripper_visualization=True, control_freq=self.control_freq) self.robot.load_model() self.robot.robot_model.set_base_xpos([0, 0, 0]) self.arena = EmptyArena() self.arena.set_origin([0.8, 0, 0]) self.pingpong = BallObject(name="pingpong", size=[0.02], rgba=[0.8, 0.8, 0, 1], solref=[0.1, 0.03], solimp=[0, 0, 1], density=100) pingpong_model = self.pingpong.get_collision() pingpong_model.append( new_joint(name="pingpong_free_joint", type="free")) pingpong_model.set("pos", "0.8 0 2.0") # merge into one self.model = MujocoWorldBase() self.model.merge(self.robot.robot_model) self.model.merge(self.arena) self.model.worldbody.append(pingpong_model) def _initialize_sim(self): # if we have an xml string, use that to create the sim. Otherwise, use the local model self.mjpy_model = self.model.get_model(mode="mujoco_py") # Create the simulation instance and run a single step to make sure changes have propagated through sim state self.sim = MjSim(self.mjpy_model) self.sim.step() self.robot.reset_sim(self.sim) self.model_timestep = self.sim.model.opt.timestep def _reset_internel(self): # reset robot self.robot.setup_references() self.robot.reset(deterministic=False) # reset pingpong pingpong_pos = self.target + np.random.rand(3) * 0.08 - 0.04 pingpong_quat = np.array([1.0, 0.0, 0.0, 0.0]) self.sim.data.set_joint_qpos( "pingpong_free_joint", np.concatenate([pingpong_pos, pingpong_quat])) # get handle for important parts self._pingpong_body_id = self.sim.model.body_name2id("pingpong") self._paddle_body_id = self.sim.model.body_name2id( "gripper0_paddle_body") # Setup sim time based on control frequency self.cur_time = 0 self.timestep = 0 self.done = False def reset(self): self.sim.reset() self._reset_internel() self.sim.forward() return self._get_observation() def _get_observation(self): di = OrderedDict() # get robot observation di = self.robot.get_observations(di) # get pingpong observation pingpong_pos = np.array( self.sim.data.body_xpos[self._pingpong_body_id]) di["pingpong_pos"] = pingpong_pos return di def step(self, action: np.ndarray): if self.done: raise ValueError("executing action in terminated episode") policy_step = True score = 0.0 for _ in range(int(self.control_timestep / self.model_timestep)): self.sim.forward() self.robot.control(action=action, policy_step=policy_step) # self.sim.data.ctrl[:] = action*5.0 self.sim.step() policy_step = False # check if the ball pass the plane h = self.sim.data.body_xpos[self._pingpong_body_id][2] self._below_plane |= h < self.plane_height if self._below_plane and h > self.plane_height: score = 1.0 self._below_plane = False self.timestep += 1 self.cur_time += self.control_timestep observation = self._get_observation() dist_xy = np.linalg.norm( (observation["robot0_eef_pos"] - observation["pingpong_pos"])[:2]) # paddle_height = observation["robot0_eef_pos"][2] self.done = self.timestep >= self.horizon or dist_xy > 0.2 reward = score # + 0 * (0.2 - dist_xy) return observation, reward, self.done, {} def render(self, mode="human"): if mode == "human": self._get_viewer().render() elif mode == "rgb_array": img = self.sim.render(1920, 1080) return img[::-1, :, ::-1] def _get_viewer(self): if self.viewer is None: self.viewer = MjViewer(self.sim) self.viewer.vopt.geomgroup[0] = 0 self.viewer._hide_overlay = True return self.viewer def close(self): self._destroy_viewer() def _destroy_viewer(self): if self.viewer is not None: glfw.destroy_window(self.viewer.window) self.viewer = None def seed(self): pass
</mujoco> """ model = load_model_from_xml(MODEL_XML) sim = MjSim(model) print("Nicely exposed function:\n") print(sim.model.get_xml()) print("\nversus MuJoCo internals:\n\n") functions.mj_saveLastXML("/tmp/saved.xml", model, "", 0) with open("/tmp/saved.xml", "r") as f: print(f.read()) sim.render(100, 100) modelpos = np.zeros(3) modelquat = np.zeros(4) roompos = np.ones(3) roomquat = np.array([1., 0., 1., 0.]) functions.mjv_room2model(modelpos, modelquat, roompos, roomquat, sim.render_contexts[0].scn) print("\n\nAnother internal function, mjv_room2model:") print("modelpos = %s, modelquat = %s" % (str(modelpos), str(modelquat))) res = np.zeros(9) functions.mju_quat2Mat(res, roomquat)