class PendulumWithGoals(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 30
    }

    def __init__(self, goal_reaching_thresholds=np.array([0.075, 0.075, 0.75]),
                 goal_not_reached_penalty=-1, goal_reached_reward=0, terminate_on_goal_reaching=True,
                 time_limit=1000, frameskip=1, random_goals_instead_of_standing_goal=False,
                 polar_coordinates: bool=False):
        super().__init__()
        dir = os.path.dirname(__file__)
        model = load_model_from_path(dir + "/pendulum_with_goals.xml")

        self.sim = MjSim(model)
        self.viewer = None
        self.rgb_viewer = None

        self.frameskip = frameskip
        self.goal = None
        self.goal_reaching_thresholds = goal_reaching_thresholds
        self.goal_not_reached_penalty = goal_not_reached_penalty
        self.goal_reached_reward = goal_reached_reward
        self.terminate_on_goal_reaching = terminate_on_goal_reaching
        self.time_limit = time_limit
        self.current_episode_steps_counter = 0
        self.random_goals_instead_of_standing_goal = random_goals_instead_of_standing_goal
        self.polar_coordinates = polar_coordinates

        # spaces definition
        self.action_space = spaces.Box(low=-self.sim.model.actuator_ctrlrange[:, 1],
                                       high=self.sim.model.actuator_ctrlrange[:, 1],
                                       dtype=np.float32)
        if self.polar_coordinates:
            self.observation_space = spaces.Dict({
                "observation": spaces.Box(low=np.array([-np.pi, -15]),
                                          high=np.array([np.pi, 15]),
                                          dtype=np.float32),
                "desired_goal": spaces.Box(low=np.array([-np.pi, -15]),
                                           high=np.array([np.pi, 15]),
                                           dtype=np.float32),
                "achieved_goal": spaces.Box(low=np.array([-np.pi, -15]),
                                            high=np.array([np.pi, 15]),
                                            dtype=np.float32)
            })
        else:
            self.observation_space = spaces.Dict({
                "observation": spaces.Box(low=np.array([-1, -1, -15]),
                                          high=np.array([1, 1, 15]),
                                          dtype=np.float32),
                "desired_goal": spaces.Box(low=np.array([-1, -1, -15]),
                                           high=np.array([1, 1, 15]),
                                           dtype=np.float32),
                "achieved_goal": spaces.Box(low=np.array([-1, -1, -15]),
                                            high=np.array([1, 1, 15]),
                                            dtype=np.float32)
            })

        self.spec = EnvSpec('PendulumWithGoals-v0')
        self.spec.reward_threshold = self.goal_not_reached_penalty * self.time_limit

        self.reset()

    def _goal_reached(self):
        observation = self._get_obs()
        if np.any(np.abs(observation['achieved_goal'] - observation['desired_goal']) > self.goal_reaching_thresholds):
            return False
        else:
            return True

    def _terminate(self):
        if (self._goal_reached() and self.terminate_on_goal_reaching) or \
                        self.current_episode_steps_counter >= self.time_limit:
            return True
        else:
            return False

    def _reward(self):
        if self._goal_reached():
            return self.goal_reached_reward
        else:
            return self.goal_not_reached_penalty

    def step(self, action):
        self.sim.data.ctrl[:] = action
        for _ in range(self.frameskip):
            self.sim.step()

        self.current_episode_steps_counter += 1

        state = self._get_obs()

        # visualize the angular velocities
        state_velocity = np.copy(state['observation'][-1] / 20)
        goal_velocity = self.goal[-1] / 20
        self.sim.model.site_size[2] = np.array([0.01, 0.01, state_velocity])
        self.sim.data.mocap_pos[2] = np.array([0.85, 0, 0.75 + state_velocity])
        self.sim.model.site_size[3] = np.array([0.01, 0.01, goal_velocity])
        self.sim.data.mocap_pos[3] = np.array([1.15, 0, 0.75 + goal_velocity])

        return state, self._reward(), self._terminate(), {}

    def _get_obs(self):

        """
        y

        ^
        |____
        |   /
        |  /
        |~/
        |/
        --------> x

        """

        # observation
        angle = self.sim.data.qpos
        angular_velocity = self.sim.data.qvel
        if self.polar_coordinates:
            observation = np.concatenate([angle - np.pi, angular_velocity])
        else:
            x = np.sin(angle)
            y = np.cos(angle)  # qpos is the angle relative to a standing pole
            observation = np.concatenate([x, y, angular_velocity])

        return {
            "observation": observation,
            "desired_goal": self.goal,
            "achieved_goal": observation
        }

    def reset(self):
        self.current_episode_steps_counter = 0

        # set initial state
        angle = np.random.uniform(np.pi / 4, 7 * np.pi / 4)
        angular_velocity = np.random.uniform(-0.05, 0.05)
        self.sim.data.qpos[0] = angle
        self.sim.data.qvel[0] = angular_velocity
        self.sim.step()

        # goal
        if self.random_goals_instead_of_standing_goal:
            angle_target = np.random.uniform(-np.pi / 8, np.pi / 8)
            angular_velocity_target = np.random.uniform(-0.2, 0.2)
        else:
            angle_target = 0
            angular_velocity_target = 0

        # convert target values to goal
        x_target = np.sin(angle_target)
        y_target = np.cos(angle_target)
        if self.polar_coordinates:
            self.goal = np.array([angle_target - np.pi, angular_velocity_target])
        else:
            self.goal = np.array([x_target, y_target, angular_velocity_target])

        # visualize the goal
        self.sim.data.mocap_pos[0] = [x_target, 0, y_target]

        return self._get_obs()

    def render(self, mode='human', close=False):
        if mode == 'human':
            if self.viewer is None:
                self.viewer = MjViewer(self.sim)
            self.viewer.render()
        elif mode == 'rgb_array':
            if self.rgb_viewer is None:
                self.rgb_viewer = MjRenderContextOffscreen(self.sim, 0)
            self.rgb_viewer.render(500, 500)
            # window size used for old mujoco-py:
            data = self.rgb_viewer.read_pixels(500, 500, depth=False)
            # original image is upside-down, so flip it
            return data[::-1, :, :]
Example #2
0
    )
    world.mode = 'human'
    world.reset()
    print(dir(world.mujoco_arena))
    print(world.mujoco_arena.table_full_size)
    print(world.mujoco_arena.bin_abs)

    print(type(world.sim))
    sim = world.sim
    print(dir(sim))
    viewer = MjRenderContextOffscreen(sim, 0)
    print(dir(viewer.scn))
    set_camera_birdview(viewer)
    width, height = 516, 386
    viewer.render(width, height)
    image = np.asarray(viewer.read_pixels(width, height, depth=False)[:, :, :],
                       dtype=np.uint8)
    depth = np.asarray((viewer.read_pixels(width, height, depth=True)[1]))
    # find center depth, this is a hack
    cdepth = depth[height // 2, width // 2]
    print(cdepth)
    depth[depth > cdepth] = cdepth
    seg = depth != cdepth
    seg[:, :padding], seg[:, -padding:] = False, False
    seg[:t_padding, :], seg[-t_padding:, :] = False, False
    cv2.imwrite('test_dataset/seg.png', seg * 255)
    # normalize the depth
    depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth))
    depth[:, :padding], depth[:, -padding:] = 0, 0
    depth[:t_padding, :], depth[-t_padding:, :] = 0, 0
    np.save('test_dataset/depth_0.npy', depth)
Example #3
0
class FetchPushEnv(FetchEnv):
    def __init__(self, rendersetting, objnum=4, reward_type='sparse'):
        initial_qpos = {
            'robot0:slide0': 0.405,
            'robot0:slide1': 0.48,
            'robot0:slide2': 0.0,
        }

        FetchEnv.__init__(self,
                          MODEL_XML_PATH,
                          has_object=True,
                          block_gripper=True,
                          n_substeps=20,
                          gripper_extra_height=0.0,
                          target_in_the_air=False,
                          target_offset=0.0,
                          obj_range=0.15,
                          target_range=0.15,
                          distance_threshold=0.05,
                          initial_qpos=initial_qpos,
                          reward_type=reward_type)
        utils.EzPickle.__init__(self)

        self.rendermode = {}
        self._viewer_setup(rendersetting)
        self.objnum = objnum

        self.objs = [
            TableObj("object{}:joint".format(i), self.model)
            for i in range(self.objnum)
        ]

        self.movedict = {
            "dimidx": [
                np.array([1.0, 0.0]),  #x - pos
                np.array([-1.0, 0.0]),  #x - neg
                np.array([0.0, 1.0]),  #y - pos,
                np.array([0.0, -1.0])
            ],  #y - neg 
        }

    def _viewer_setup(self, rendersetting):
        self.rendermode["rendertype"] = rendersetting["render_flg"]

        if rendersetting[
                "render_flg"] == True:  # in-screen: ignore other mode parameters
            self.viewer = MjViewer(self.sim)
        else:  # off-screen:
            self.rendermode["W"] = rendersetting["screenwidth"]
            self.rendermode["H"] = rendersetting["screenhight"]
            self.rendermode["pltswitch"] = rendersetting["plt_switch"]
            self.viewer = MjRenderContextOffscreen(self.sim)
            if plt.get_fignums():
                plt.close()
            self.fig, self.ax = plt.subplots()

        # body_id = self.sim.model.body_name2id('robot0:gripper_link')
        # lookat = self.sim.data.body_xpos[body_id]
        # for idx, value in enumerate(lookat):
        #     self.viewer.cam.lookat[idx] = value
        self.viewer.cam.lookat[:] = rendersetting["lookat"]
        self.viewer.cam.distance = rendersetting["distance"]
        self.viewer.cam.azimuth = rendersetting["azimuth"]
        self.viewer.cam.elevation = rendersetting["elevation"]

    def offscreenrender(self):
        self.viewer.render(self.rendermode["H"], self.rendermode["W"])
        im_src = self.viewer.read_pixels(self.rendermode["H"],
                                         self.rendermode["W"],
                                         depth=False)
        im_src = np.flip(im_src)
        im_src = np.flip(im_src, axis=1)
        im_src = np.flip(im_src, axis=2)

        if self.rendermode["pltswitch"]:
            self.ax.cla()
            self.ax.imshow(im_src)
            plt.pause(1e-10)

        return im_src, not plt.get_fignums()

    def move_gripper(self, newpos):
        # Move end effector into position.
        gripper_target = np.array(newpos)
        gripper_rotation = np.array([1., 0., 1., 0.])
        self.sim.data.set_mocap_pos('robot0:mocap', gripper_target)
        self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation)
        for _ in range(10):
            self.sim.step()
            self.viewer.render()
            # self.offscreenrender()

    def mesh_init_cubes(self, gridsize=0.4):
        def mesh_random(pt_num, swing=0.375, seglow=-0.7, seghigh=0.8):
            res = []
            last_pt = seglow - swing
            for remain_pt_num in reversed(range(pt_num)):
                segrange = [last_pt + swing, seghigh - remain_pt_num * swing]
                last_pt = np.random.uniform(low=segrange[0], high=segrange[1])
                res.append(last_pt)
            return np.random.permutation(res)

        self.x_coord = mesh_random(self.objnum, swing=gridsize)
        self.y_coord = mesh_random(self.objnum, swing=gridsize)

        for i in range(self.objnum):
            pos = [self.x_coord[i], self.y_coord[i]]
            self.objs[i].set_pos(pos, self.sim)
        self.sim.forward()

    def random_push(self, savepath):
        video_data = np.zeros((2, self.rendermode["H"], self.rendermode["W"],
                               3)).astype(np.uint8)
        move_fashion, move_cube = np.random.randint(4, size=2)

        im1, _ = self.offscreenrender()
        self.push_obj(move_fashion, move_cube)
        im2, _ = self.offscreenrender()

        video_data[0, ...] = im1
        video_data[1, ...] = im2
        np.savez_compressed(savepath,
                            video=video_data,
                            cubeindx=move_cube,
                            actionindx=move_fashion)

    def push_obj(self, move_fashion, move_cube):

        objpos = np.array(self.objs[move_cube].get_pos())
        dimidx = self.movedict["dimidx"][move_fashion]

        objpos += np.append(dimidx * 0.06, [0.4])
        # objpos[1]+= 0.07
        # objpos[2]+= 0.4
        self.move_gripper(objpos)

        objpos[2] -= 0.38
        self.move_gripper(objpos)

        for _ in range(10):
            self._set_action(np.append(dimidx * -0.4, [0.0, 0.0]))
            # self._set_action(np.array([0.0,-0.4,0,0]))
            self.sim.step()
            # self.viewer.render()
            # self.offscreenrender()

        for _ in range(10):
            self._set_action(np.append(dimidx * 0.1, [0.0, 0.0]))
            # self._set_action(np.array([0.0,0.1,0,0]))
            self.sim.step()
            self.viewer.render()
            # self.offscreenrender()

        objpos[2] += 0.7
        self.move_gripper(objpos)

    def reset(self):
        self._reset_sim()
        self.mesh_init_cubes()
        self.move_gripper([1.2, 0.75, 0.8])
Example #4
0
class ImiRob:
    """ imitate robot that imitates(replay) the actions extracted from other robots
        This robot simulator is of the quadruped robob which has 4 legs along with 2 hinge joints for each leg.
        This simulation is based on mujoco. All robots are derivation of OpenAI gym ant.
    """

    def __init__(self,rendersetting,frame_skip=4,objnum=4,showarm=False):
        if showarm:
            xmlsource = '/home/cml/CML/MjPushData/xmls/show_env.xml'
        else:
            xmlsource = '/home/cml/CML/MjPushData/xmls/lab_env.xml'

        self.model =  load_model_from_path(xmlsource)
        self.sim = MjSim(self.model)
        self.frame_skip = frame_skip
        self.rendermode = {}
        self.init_state = self.sim.get_state()

        self.viewer = None
        self.setrendermode(rendersetting)
        self.stopflag = False
        self.objnum = objnum
        self.showarm = showarm

   

        self.cubes = [TableObj(["free_x_"+str(i+1),"free_y_"+str(i+1)],self.model) for i in range(self.objnum)]
        self.move_dict = {
            0:[0.3,0],
            1:[-0.3,0],
            2:[0,0.3,],
            3:[0,-0.3,]
        }
  
    
    def init_cubes(self,mjqpos):
        for i in range(self.objnum):
            pos = [np.random.uniform(low=-1.0, high=1.0),np.random.uniform(low=-1.0, high=1.0)]
            # pos =[i*0.3,i*0.3]
            mjqpos = self.cubes[i].set_pos(pos,mjqpos)
        
        return mjqpos
    
    def mesh_init_cubes(self,mjqpos,gridsize=0.5):
        def mesh_random(pt_num,swing=0.5,seglow=-1.0,seghigh=1.0):
            res = []
            last_pt = seglow - swing
            for remain_pt_num in reversed(range(pt_num)):
                segrange = [last_pt+swing,seghigh - remain_pt_num*swing]
                last_pt = np.random.uniform(low=segrange[0],high=segrange[1])
                res.append(last_pt)
            return np.random.permutation(res)
        
        self.x_coord = mesh_random(self.objnum,swing=gridsize)
        self.y_coord = mesh_random(self.objnum,swing=gridsize)
        
        for i in range(self.objnum):
            pos = [self.x_coord[i],self.y_coord[i]]
            mjqpos = self.cubes[i].set_pos(pos,mjqpos)
        
        return mjqpos
    
    def check_contacts(self):
        def checkdistance(poses,scaling=0.6,eps=0.1):
            poses = sorted(poses)
            dis = np.array([np.abs(poses[i]-poses[i+1]) for i in range(len(poses)-1)])
            return np.any(dis < eps)

        cube_pos_x = [self.cubes[i].pos[0] for i in range(self.objnum)]
        cube_pos_y = [self.cubes[i].pos[1] for i in range(self.objnum)]
        return checkdistance(cube_pos_x) or checkdistance(cube_pos_y)

        





    def setrendermode(self,rendersetting):#render_flg, plt_switch, screenwidth = 500, screenhight = 500, interval = 5):
        self.rendermode["rendertype"]=rendersetting["render_flg"]
        if rendersetting["render_flg"] == True: # in-screen: ignore other mode parameters
            self.viewer = MjViewer(self.sim)
            return
        else:                  # off-screen:
            self.rendermode["W"] = rendersetting["screenwidth"]
            self.rendermode["H"] = rendersetting["screenhight"]
            self.rendermode["pltswitch"] = rendersetting["plt_switch"]
            self.viewer = MjRenderContextOffscreen(self.sim)
            if plt.get_fignums():
                plt.close()
            self.fig,self.ax = plt.subplots()

            return 
    
    
    
    def onscreenshow(self):
        self.viewer.render()
    
    def offscreenshow(self):
        self.viewer.render(self.rendermode["H"],self.rendermode["W"])
        im_src = self.viewer.read_pixels(self.rendermode["H"],self.rendermode["W"],depth=False)
        im_src = np.flip(im_src)
        im_src = np.flip(im_src, axis = 1)
        im_src = np.flip(im_src, axis = 2)


        if self.rendermode["pltswitch"]:
            self.ax.cla()
            self.ax.imshow(im_src)
            plt.pause(1e-10)

        return im_src, not plt.get_fignums()

    
    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        old_state = self.sim.get_state()
        new_state = MjSimState(old_state.time, qpos, qvel,
                                         old_state.act, old_state.udd_state)
        self.sim.set_state(new_state)
        self.sim.forward()

    def reset(self):
        self.sim.set_state(self.init_state)
        self.stopflag = False

        self.qposrecrd = []

        self.init_qpos = self.sim.data.qpos.ravel().copy()
        self.init_qvel = self.sim.data.qvel.ravel().copy()
        qpos = self.init_qpos.copy()
        qvel = self.init_qvel.copy()
  
        if self.showarm:
            qpos = np.array([0.2, -90 / 180 * math.pi, 70 / 180 * math.pi,
               0, 0, 0, # robotic arm
               0, 0, 0, 0,
               0, 0, 0, 0, # two fingers
               -0.62, 0.32,
               -0.72, 0.38,
               -0.835, 0.425,
               -0.935, 0.46,]) # 4 cubes

 

        qpos = self.mesh_init_cubes(qpos)
        self.set_state(qpos, qvel)



    def setcam(self, distance=3, azimuth=0, elevation=-90, lookat=[-0.2,0,0], hideoverlay=True,trackbodyid=-1):
        self.viewer.cam.trackbodyid = trackbodyid
        self.viewer.cam.distance=distance
        self.viewer.cam.azimuth=azimuth
        self.viewer.cam.elevation=elevation
        self.viewer.cam.lookat[:]=lookat
        self.viewer._hide_overlay=hideoverlay

    def moverobot(self):
        self.init_qpos = self.sim.data.qpos.ravel().copy()
        self.init_qvel = self.sim.data.qvel.ravel().copy()
        qpos = self.init_qpos.copy()
        qvel = self.init_qvel.copy()

        qpos=self.cubes[0].move_cube([0.2,0],qpos)


        self.set_state(qpos, qvel)
    
    def random_move_cube(self):
        move_fashion, move_cube = np.random.randint(4,size=2)
        self.init_qpos = self.sim.data.qpos.ravel().copy()
        self.init_qvel = self.sim.data.qvel.ravel().copy()
        qpos = self.init_qpos.copy()
        qvel = self.init_qvel.copy()
        qpos=self.cubes[move_cube].move_cube(self.move_dict[move_fashion],qpos)
        self.set_state(qpos, qvel)

        return move_cube,move_fashion

    def save_video(self,savepath):
        video_data = np.zeros((2,self.rendermode["H"],self.rendermode["W"],3)).astype(np.uint8)
        im1,_ = self.offscreenshow()
        movecube,movefashion = self.random_move_cube()
        im2,_ = self.offscreenshow()
        video_data[0,...] = im1
        video_data[1,...] = im2

        np.savez_compressed(savepath,video=video_data,cubeindx=movecube,actionindx=movefashion)

    def __del__(self):
        del self.viewer
        del self.model
        del self.sim