class MobileRobotX(SRLGymEnv):
    """


    """
    def __init__(self,
                 renders=False,
                 is_discrete=True,
                 name="mobile_robot",
                 max_distance=1.6,
                 shape_reward=False,
                 record_data=False,
                 srl_model="raw_pixels",
                 random_target=False,
                 state_dim=-1,
                 verbose=False,
                 save_path='srl_zoo/data/',
                 env_rank=0,
                 srl_pipe=None,
                 img_shape=None,
                 **_):
        super(MobileRobotX, self).__init__(srl_model=srl_model,
                                           relative_pos=RELATIVE_POS,
                                           env_rank=env_rank,
                                           srl_pipe=srl_pipe)

        self._renders = renders
        self.img_shape = img_shape  # channel first !!
        if self.img_shape is None:
            self._width = RENDER_WIDTH
            self._height = RENDER_HEIGHT
        else:
            self._height, self._width = self.img_shape[1:]

        self._max_distance = max_distance
        self._shape_reward = shape_reward
        self._random_target = random_target

        self._is_discrete = is_discrete
        self.state_dim = state_dim
        self.relative_pos = RELATIVE_POS
        self.saver = None
        self.verbose = verbose
        self.max_steps = MAX_STEPS
        self.robot_pos = np.zeros(2)
        self.target_pos = np.zeros(2)
        # Boundaries of the square env
        self._min_x, self._max_x = 0, 4
        self._min_y, self._max_y = 0, 4
        self.collision_margin = 0.1

        self._observation = []
        self._env_step_counter = 0
        self.has_bumped = False  # Used for handling collisions
        self.imgs_folder = "mobilerobot"  ## 'mobilerobot', 'omnirobot'
        self.robot_img = cv2.imread(
            "./environments/mobile_robot_extreme/images/{}/robot_224.png".
            format(self.imgs_folder))
        self.target_img = cv2.imread(
            "./environments/mobile_robot_extreme/images/{}/target_224.png".
            format(self.imgs_folder))
        self.background_img = cv2.imread(
            "./environments/mobile_robot_extreme/images/{}/background_224.png".
            format(self.imgs_folder))

        robot_img_shape = self.robot_img.shape
        self.robot_img = cv2.resize(
            self.robot_img, (int(robot_img_shape[1] * self._width / 224),
                             int(robot_img_shape[0] * self._height / 224)))
        target_img_shape = self.target_img.shape
        self.target_img = cv2.resize(
            self.target_img, (int(target_img_shape[1] * self._width / 224),
                              int(target_img_shape[0] * self._height / 224)))
        self.background_img = cv2.resize(self.background_img,
                                         (self._width, self._height))

        # cv2.imshow("robot", self.robot_img)
        # k = cv2.waitKey(0)
        # if k == ord("q"):
        #     raise KeyboardInterrupt
        # self.walls = None
        self.srl_model = srl_model

        if record_data:
            self.saver = EpisodeSaver(name,
                                      max_distance,
                                      state_dim,
                                      globals_=getGlobals(),
                                      relative_pos=RELATIVE_POS,
                                      learn_states=False,
                                      path=save_path)

        if self._is_discrete:
            self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        else:
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(2, ),
                                           dtype=np.float32)

        if self.srl_model == "ground_truth":
            self.state_dim = self.getGroundTruthDim()

        if self.srl_model == "raw_pixels":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self._height,
                                                       self._width, 3),
                                                dtype=np.uint8)
        else:
            self.observation_space = spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=(self.state_dim, ),
                                                dtype=np.float32)

    def getTargetPos(self):
        # Return only the [x, y] coordinates
        return self.target_pos

    # @staticmethod
    def getGroundTruthDim(self):
        """
        The new convention for the ground truth (GT): GT should include target position under random target
        setting. 
        """
        # HACK : Monkey-Patch, shit solution to solve problem.
        if not self._random_target:
            return 2
        else:
            return 4

    def getRobotPos(self):
        # Return only the [x, y] coordinates
        return self.robot_pos

    def getGroundTruth(self):
        """
        The new convention for the ground truth (GT): GT should include target position under random target
        setting. A better solution would be "change all the environment files, especially srl_env.py" !!! 

        """
        # HACK: Monkey-Patch, shit solution to solve problem.

        robot_pos = self.getRobotPos()
        if self._random_target:
            if self.relative_pos:
                # HACK here! Change srl_env.py and all the other envs in the future !!! TODO TODO TODO
                return robot_pos
            else:
                # check 'envs.observation_space' in rl_baselines/base_classes.py (before self.model.learn) !!!
                target_pos = self.getTargetPos()
                return np.concatenate([robot_pos, target_pos], axis=0)

        else:
            if self.relative_pos:
                # HACK here! Change srl_env.py and all the other envs in the future !!! TODO TODO TODO
                return robot_pos
            else:
                return robot_pos

    def reset(self):
        self._env_step_counter = 0
        self.has_bumped = False
        # Init the robot randomly
        x_start = self._max_x / 2 + self.np_random.uniform(
            -self._max_x / 3, self._max_x / 3)
        y_start = self._max_y / 2 + self.np_random.uniform(
            -self._max_y / 3, self._max_y / 3)

        self.robot_pos = np.array([x_start, y_start])

        # Initialize target position
        x_pos = 0.8 * self._max_x
        y_pos = 0.7 * self._max_y
        if self._random_target:
            margin = self.collision_margin * self._max_x + 0.08  ## 0.4 + 0.05
            x_pos = self.np_random.uniform(self._min_x + margin,
                                           self._max_x - margin)
            y_pos = self.np_random.uniform(self._min_y + margin,
                                           self._max_y - margin)

        self.target_pos = np.array([x_pos, y_pos])

        # self.walls = [wall_left, wall_bottom, wall_right, wall_top]

        self._observation = self.getObservation()

        if self.saver is not None:
            self.saver.reset(self._observation, self.getTargetPos(),
                             self.getRobotPos())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation)

        return self._observation

    def getObservation(self):
        """
        :return: (numpy array)
        """
        self._observation = self.render("rgb_array")
        return self._observation

    def step(self, action):
        # True if it has bumped against a wall
        self._env_step_counter += 1
        self.has_bumped = False
        if self._is_discrete:
            dv = DELTA_POS
            # Add noise to action
            dv += self.np_random.normal(0.0, scale=NOISE_STD)
            dx = [-dv, dv, 0, 0][action]
            dy = [0, 0, -dv, dv][action]
            real_action = np.array([dx, dy])
        else:
            dv = DELTA_POS
            # Add noise to action
            dv += self.np_random.normal(0.0, scale=NOISE_STD)
            # scale action amplitude between -dv and dv
            real_action = np.maximum(np.minimum(action, 1), -1) * dv

        previous_pos = self.robot_pos.copy()
        self.robot_pos += real_action
        # Handle collisions
        for i, (limit, robot_dim) in enumerate(
                zip([self._max_y, self._max_x], [ROBOT_WIDTH, ROBOT_LENGTH])):
            margin = self.collision_margin * limit + robot_dim / 2  #
            margin += 0.02
            # If it has bumped against a wall, stay at the previous position
            if self.robot_pos[i] <= margin or self.robot_pos[
                    i] >= limit - margin:
                self.has_bumped = True
                self.robot_pos = previous_pos
                break
        # Update mobile robot position on image

        self._observation = self.getObservation()

        # calculate the reward of this step
        distance = np.linalg.norm(self.getTargetPos() - self.robot_pos, 2)
        reward = 0
        if distance <= REWARD_DIST_THRESHOLD:
            reward = 1
        # Negative reward when it bumps into a wall
        if self.has_bumped:
            reward = -1
        if self._shape_reward:
            reward = -distance

        done = self._env_step_counter > self.max_steps

        if self.saver is not None:
            self.saver.step(self._observation, action, reward, done,
                            self.getRobotPos())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation), reward, done, {}

        return np.array(self._observation), reward, done, {}

    def render(self, mode='rgb_array'):
        """
        mode is useless
        """

        image = self.background_img.copy()
        # self.robot_pos = np.array([1.2, 3.6])
        # ## get robot position on image (pixel position)
        # robot_img_pos = np.array([(self.robot_pos[1]/self._max_y)*self._height, (self.robot_pos[0]/self._max_x) * self._width], dtype=int)
        # h_st = robot_img_pos[0]-self.robot_img.shape[0]//2 #, robot_img_pos[0]+self.robot_img.shape[0]//2
        # h_ed = self.robot_img.shape[0]+h_st
        # w_st = robot_img_pos[1]-self.robot_img.shape[1]//2
        # w_ed = self.robot_img.shape[1]+w_st
        # # print(self.robot_pos)
        # ## HACK
        # mask = (np.mean(self.robot_img, axis=-1) < 250)
        # # import matplotlib.pyplot as plt
        # # plt.figure()
        # # plt.imshow(mask.astype(float))
        # # plt.show()
        # image[h_st:h_ed, w_st:w_ed, :][mask] = self.robot_img[mask]

        # self.target_pos = np.array([3.6, 3.6])
        # print(self.target_pos)

        for obj_pos, obj_img in zip([self.target_pos, self.robot_pos],
                                    [self.target_img, self.robot_img]):
            obj_img_pos = np.array([(obj_pos[1] / self._max_y) * self._height,
                                    (obj_pos[0] / self._max_x) * self._width],
                                   dtype=int)
            h_st = obj_img_pos[0] - obj_img.shape[0] // 2
            h_ed = obj_img.shape[0] + h_st
            w_st = obj_img_pos[1] - obj_img.shape[1] // 2
            w_ed = obj_img.shape[1] + w_st
            mask = (np.mean(obj_img, axis=-1) < 240)
            image[h_st:h_ed, w_st:w_ed, :][mask] = obj_img[mask]

        if self._renders:
            cv2.imshow(
                "Keep pressing (any key) to display or 'q'/'Esc' to quit.",
                image)
            key = cv2.waitKey(0)
            if key == ord('q') or key == 27:  # 'q' or 'Esc'
                cv2.destroyAllWindows()
                raise KeyboardInterrupt
        return image[..., ::-1]

    def interactive(self, show_map=False):
        image = self.getObservation()
        cv2.imshow("image", image[..., ::-1])
        while True:
            k = cv2.waitKey(0)
            if k == 27 or k == ord("q"):  # press 'Esc' or 'q' to quit
                break
            elif k == ord("2") or k == 84:
                # down
                action = 3
                image, reward, done, _ = self.step(action)
                print("Action: {}, Reward: {}, Done: {}".format(
                    action, reward, done))
                if show_map:
                    print(self.map)
                cv2.imshow("image", image[..., ::-1])
            elif k == ord("6") or k == 83:
                # right
                action = 1
                image, reward, done, _ = self.step(action)
                print("Action: {}, Reward: {}, Done: {}".format(
                    action, reward, done))
                if show_map:
                    print(self.map)
                cv2.imshow("image", image[..., ::-1])
            elif k == ord("8") or k == 82:
                # up
                action = 2
                image, reward, done, _ = self.step(action)
                print("Action: {}, Reward: {}, Done: {}".format(
                    action, reward, done))
                if show_map:
                    print(self.map)
                cv2.imshow("image", image[..., ::-1])
            elif k == ord("4") or k == 81:
                # left
                action = 0
                image, reward, done, _ = self.step(action)
                print("Action: {}, Reward: {}, Done: {}".format(
                    action, reward, done))
                if show_map:
                    print(self.map)
                cv2.imshow("image", image[..., ::-1])
            else:
                print("You are pressing the key: {}".format(k))
Example #2
0
class RoboboEnv(SRLGymEnv):
    """
    Robobo robot Environment (Gym wrapper for Robobo Gazebo environment)
    The goal of robobo is to go to the location on the table
    (signaled with a circle sticker on the table)
    :param renders: (bool) Whether to display the GUI or not
    :param is_discrete: (bool) true if action space is discrete vs continuous
    :param log_folder: (str) name of the folder where recorded data will be stored
    :param state_dim: (int) When learning states
    :param learn_states: (bool)
    :param record_data: (bool) Set to true, record frames with the rewards.
    :param shape_reward: (bool) Set to true, reward = -distance_to_goal
    :param env_rank: (int) the number ID of the environment
    :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model
    """
    def __init__(self,
                 renders=False,
                 is_discrete=True,
                 log_folder="robobo_log_folder",
                 state_dim=-1,
                 learn_states=False,
                 srl_model="raw_pixels",
                 record_data=False,
                 shape_reward=False,
                 env_rank=0,
                 srl_pipe=None,
                 img_shape=None):

        super(RoboboEnv, self).__init__(srl_model=srl_model,
                                        relative_pos=RELATIVE_POS,
                                        env_rank=env_rank,
                                        srl_pipe=srl_pipe)
        self.n_contacts = 0
        use_ground_truth = srl_model == 'ground_truth'
        use_srl = srl_model != 'raw_pixels'
        self.use_srl = use_srl or use_ground_truth
        self.use_ground_truth = use_ground_truth
        self.use_joints = False
        self.relative_pos = RELATIVE_POS
        self._is_discrete = is_discrete
        self.observation = []
        # Start simulation with first observation
        self._env_step_counter = 0
        self.episode_terminated = False
        self.state_dim = state_dim
        self._renders = renders
        self._shape_reward = shape_reward
        self.cuda = th.cuda.is_available()
        self.target_pos = None
        self.saver = None
        if img_shape is None:
            self.img_shape = (3, RENDER_HEIGHT, RENDER_WIDTH)
        else:
            self.img_shape = img_shape

        if self._is_discrete:
            self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        else:
            action_dim = 2
            self._action_bound = 1
            action_bounds = np.array([self._action_bound] * action_dim)
            self.action_space = spaces.Box(-action_bounds,
                                           action_bounds,
                                           dtype=np.float32)
        # SRL model
        if self.use_srl:
            if use_ground_truth:
                self.state_dim = self.getGroundTruthDim()
            self.dtype = np.float32
            self.observation_space = spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=(self.state_dim, ),
                                                dtype=self.dtype)
        else:
            self.dtype = np.uint8
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.img_shape[2],
                                                       self.img_shape[1], 3),
                                                dtype=self.dtype)

        if record_data:
            print("Recording data...")
            self.saver = EpisodeSaver(log_folder,
                                      0,
                                      self.state_dim,
                                      globals_=getGlobals(),
                                      relative_pos=RELATIVE_POS,
                                      learn_states=learn_states)

        # Initialize Baxter effector by connecting to the Gym bridge ROS node:
        self.context = zmq.Context()
        self.socket = self.context.socket(zmq.PAIR)
        self.socket.connect("tcp://{}:{}".format(HOSTNAME, SERVER_PORT))

        # note: if takes too long, run first client, then server
        print("Waiting for server connection...")
        msg = self.socket.recv_json()
        print("Connected to server (received message: {})".format(msg))

        self.action = [0, 0]
        self.reward = 0
        self.robobo_pos = np.array([0, 0])

        # Initialize the state
        if self._renders:
            self.image_plot = None

    def step(self, action):
        """
        :action: (int)
        :return: (tensor (np.ndarray)) observation, int reward, bool done, dict extras)
        """
        assert self.action_space.contains(action)
        # Convert int action to action in (x,y,z) space
        self.action = action
        self._env_step_counter += 1

        # Send the action to the server
        self.socket.send_json({"command": "action", "action": self.action})

        # Receive state data (position, etc), important to update state related values
        self.getEnvState()

        #  Receive a camera image from the server
        self.observation = self.getObservation()
        done = self._hasEpisodeTerminated()
        if self.saver is not None:
            self.saver.step(self.observation, action, self.reward, done,
                            self.getGroundTruth())
        if self.use_srl:
            return self.getSRLState(self.observation), self.reward, done, {}
        else:
            return self.observation, self.reward, done, {}

    def getEnvState(self):
        """
        Returns a dictionary containing info about the environment state.
        :return: (dict) state_data containing data related to the state: target_pos,
        robobo_pos and reward.
        """
        state_data = self.socket.recv_json()
        self.reward = state_data["reward"]
        self.target_pos = np.array(state_data["target_pos"])
        self.robobo_pos = np.array(state_data["position"])

        return state_data

    def getObservation(self):
        """
        Receive the observation image using a socket
        :return: (numpy ndarray) observation
        """
        # Receive a camera image from the server
        self.observation = recvMatrix(self.socket)
        # Resize it:
        self.observation = cv2.resize(self.observation,
                                      (self.img_shape[2], self.img_shape[1]),
                                      interpolation=cv2.INTER_AREA)
        return self.observation

    def getTargetPos(self):
        """
        :return (numpy array): Position of the target (button)
        """
        return self.target_pos

    @staticmethod
    def getGroundTruthDim():
        """
        :return: (int)
        """
        return 2

    def getGroundTruth(self):
        """
        Alias for getRoboboPos for compatibility between envs
        :return: (numpy array)
        """
        return np.array(self.getRoboboPos())

    def getRoboboPos(self):
        """
        :return: ([float])->  np.ndarray Position (x, y, z) of Baxter left gripper
        """
        return self.robobo_pos

    def reset(self):
        """
        Reset the environment
        :return: (numpy ndarray) first observation of the env
        """
        self.episode_terminated = False
        # Step count since episode start
        self._env_step_counter = 0
        self.socket.send_json({"command": "reset"})
        # Update state related variables, important step to get both data and
        # metadata that allow reading the observation image
        self.getEnvState()
        self.observation = self.getObservation()
        if self.saver is not None:
            self.saver.reset(self.observation, self.getTargetPos(),
                             self.getGroundTruth())
        if self.use_srl:
            return self.getSRLState(self.observation)
        else:
            return self.observation

    def _hasEpisodeTerminated(self):
        """
        Returns True if the episode is over and False otherwise
        """
        if self.episode_terminated or self._env_step_counter > MAX_STEPS:
            return True
        return False

    def closeServerConnection(self):
        """
        To be called at the end of running the program, externally
        """
        print("Robobo_env client exiting and closing socket...")
        self.socket.send_json({"command": "exit"})
        self.socket.close()

    def render(self, mode='rgb_array'):
        """
        :param mode: (str)
        :return: (numpy array) BGR image
        """
        if mode != "rgb_array":
            print('render in human mode not yet supported')
            return np.array([])

        if self._renders:
            plt.ion()  # needed for interactive update
            if self.image_plot is None:
                plt.figure('Robobo RL')
                self.image_plot = plt.imshow(bgr2rgb(self.observation),
                                             cmap='gray')
                self.image_plot.axes.grid(False)
            else:
                self.image_plot.set_data(bgr2rgb(self.observation))
            plt.draw()
            # Wait a bit, so that plot is visible
            plt.pause(0.0001)
        return self.observation
Example #3
0
class InmoovButtonGymEnv(SRLGymEnv):
    """
    Gym wrapper for Kuka environment with a push button
    :param urdf_root: (str) Path to pybullet urdf files
    :param renders: (bool) Whether to display the GUI or not
    :param is_discrete: (bool) Whether to use discrete or continuous actions
    :param multi_view :(bool) if TRUE -> returns stacked images of the scene on 6 channels (two cameras)
    :param name: (str) name of the folder where recorded data will be stored
    :param max_distance: (float) Max distance between end effector and the button (for negative reward)
    :param action_repeat: (int) Number of timesteps an action is repeated (here it is equivalent to frameskip)
    :param shape_reward: (bool) Set to true, reward = -distance_to_goal
    :param action_joints: (bool) Set actions to apply to the joint space
    :param record_data: (bool) Set to true, record frames with the rewards.
    :param random_target: (bool) Set the button position to a random position on the table
    :param force_down: (bool) Set Down as the only vertical action allowed
    :param state_dim: (int) When learning states
    :param learn_states: (bool)
    :param verbose: (bool) Whether to print some debug info
    :param save_path: (str) location where the saved data should go
    :param env_rank: (int) the number ID of the environment
    :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model
    :param srl_model: (str) The SRL_model used
    """
    def __init__(self, urdf_root=pybullet_data.getDataPath(), renders=False, is_discrete=True, multi_view=False,
                 name="kuka_button_gym", max_distance=0.8, action_repeat=1, shape_reward=False, action_joints=False,
                 record_data=False, random_target=False, force_down=True, state_dim=-1, learn_states=False,
                 verbose=False, save_path='srl_zoo/data/', env_rank=0, srl_pipe=None, srl_model="raw_pixels", **_):
        super(InmoovButtonGymEnv, self).__init__(srl_model=srl_model,
                                               relative_pos=RELATIVE_POS,
                                               env_rank=env_rank,
                                               srl_pipe=srl_pipe)
        self._timestep = 1. / 240.
        # URDF PATH
        self._urdf_root = urdf_root
        self._urdf_sjtu = "../../urdf_robot/"

        self._action_repeat = action_repeat
        self._observation = []
        self._env_step_counter = 0
        self._renders = renders
        self._width = RENDER_WIDTH
        self._height = RENDER_HEIGHT

        self._cam_dist = 1.1
        self._cam_yaw = 145
        self._cam_pitch = -36
        self._cam_roll = 0

        self._max_distance = max_distance
        self._shape_reward = shape_reward
        self._random_target = random_target
        self._force_down = force_down

        self.camera_target_pos = (0.316, -0.2, -0.1)
        self._is_discrete = is_discrete
        self.terminated = False
        self.renderer = p.ER_TINY_RENDERER
        self.debug = True
        self.n_contacts = 0
        self.state_dim = state_dim
        self.action_joints = action_joints
        self.relative_pos = RELATIVE_POS
        self.cuda = th.cuda.is_available()
        self.saver = None
        self.multi_view = multi_view
        self.verbose = verbose
        self.max_steps = MAX_STEPS
        self.n_steps_outside = 0
        self.table_uid = None
        self.button_pos = None
        self.button_uid = None
        self._kuka = None
        self._inmoov = None
        self.action = None
        self.srl_model = srl_model

        if record_data:
            self.saver = EpisodeSaver(name, max_distance, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS,
                                      learn_states=learn_states, path=save_path)

        if self._renders:
            client_id = p.connect(p.SHARED_MEMORY)
            if client_id < 0:
                p.connect(p.GUI)
            p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])

            self.debug = True
            # Debug sliders for moving the camera
            self.x_slider = p.addUserDebugParameter("x_slider", -10, 10, self.camera_target_pos[0])
            self.y_slider = p.addUserDebugParameter("y_slider", -10, 10, self.camera_target_pos[1])
            self.z_slider = p.addUserDebugParameter("z_slider", -10, 10, self.camera_target_pos[2])
            self.dist_slider = p.addUserDebugParameter("cam_dist", 0, 10, self._cam_dist)
            self.yaw_slider = p.addUserDebugParameter("cam_yaw", -180, 180, self._cam_yaw)
            self.pitch_slider = p.addUserDebugParameter("cam_pitch", -180, 180, self._cam_pitch)

        else:
            p.connect(p.DIRECT)

        global CONNECTED_TO_SIMULATOR
        CONNECTED_TO_SIMULATOR = True

        if self._is_discrete:
            self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)

        else:
            if self.action_joints:
                # 7 angles for the arm rotation, from -1 to 1
                #TODO:action_dim should be changed
                action_dim = 7
                self._action_bound = 1
            else:
                # 3 direction for the arm movement, from -1 to 1
                action_dim = 3
                self._action_bound = 1
            action_high = np.array([self._action_bound] * action_dim)
            self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)

        if self.srl_model == "ground_truth":
            self.state_dim = self.getGroundTruthDim()
        #elif self.srl_model == "joints":
        #    self.state_dim = self.getJointsDim()
        #elif self.srl_model == "joints_position":
        #    self.state_dim = self.getGroundTruthDim() + self.getJointsDim()

        #if self.srl_model == "raw_pixels":
        #    self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8)
        #else:
        #    self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim,), dtype=np.float32)

    def getSRLState(self, observation):
        state = []
        if self.srl_model in ["ground_truth"]:
            if self.relative_pos:
                state += list(self.getGroundTruth() - self.getTargetPos())
            else:
                state += list(self.getGroundTruth())
        if len(state) != 0:
            return np.array(state)
        else:
            self.srl_pipe[0].put((self.env_rank, observation))
            return self.srl_pipe[1][self.env_rank].get()
        '''
        if self.srl_model in ["ground_truth", "joints_position"]:
            if self.relative_pos:
                state += list(self.getGroundTruth() - self.getTargetPos())
            else:
                state += list(self.getGroundTruth())
        if self.srl_model in ["joints", "joints_position"]:
            state += list(self._kuka.joint_positions)

        if len(state) != 0:
            return np.array(state)
        else:
            self.srl_pipe[0].put((self.env_rank, observation))
            return self.srl_pipe[1][self.env_rank].get()'''

    def getTargetPos(self):
        return self.button_pos

    @staticmethod
    def getJointsDim():
        """
        :return: (int)
        """
        return 6

    @staticmethod
    def getGroundTruthDim():
        return 3

    def getGroundTruth(self):
        return np.array(self.getArmPos())

    def getArmPos(self):
        """
        :return: ([float]) Position (x, y, z) of inmoov hand
        """
        return p.getLinkState(self._inmoov.inmoov_id, self._inmoov.effectorId)[0]

    def reset(self):
        self.terminated = False
        self.n_contacts = 0
        self.n_steps_outside = 0
        p.resetSimulation()
        p.setPhysicsEngineParameter(numSolverIterations=150)
        p.setTimeStep(self._timestep)
        p.loadURDF(os.path.join(self._urdf_root, "plane.urdf"), [0, 0, -1])

        self.table_uid = p.loadURDF(os.path.join(self._urdf_root, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
                                    0.000000, 0.000000, 0.0, 1.0)

        # Initialize button position
        x_pos = 0.5
        y_pos = 0
        if self._random_target:
            x_pos += 0.15 * self.np_random.uniform(-1, 1)
            y_pos += 0.3 * self.np_random.uniform(-1, 1)

        self.button_uid = p.loadURDF("/urdf/simple_button.urdf", [x_pos, y_pos, Z_TABLE])
        self.button_pos = np.array([x_pos, y_pos, Z_TABLE])

        p.setGravity(0, 0, -10)
        self._inmoov = inmoov.Inmoov(urdf_path = self._urdf_sjtu)
        self._env_step_counter = 0
        # Close the gripper and wait for the arm to be in rest position
        for _ in range(500):
            if self.action_joints:
                self._inmoov.applyAction(list(np.array(self._inmoov.joint_positions)[:7]) + [0, 0])
            else:
                self._inmoov.applyAction([0, 0, 0, 0, 0])
            p.stepSimulation()

        # Randomize init arm pos: take 5 random actions
        for _ in range(N_RANDOM_ACTIONS_AT_INIT):
            if self._is_discrete:
                action = [0, 0, 0, 0, 0]
                sign = 1 if self.np_random.rand() > 0.5 else -1
                action_idx = self.np_random.randint(3)  # dx, dy or dz
                action[action_idx] += sign * DELTA_V
            else:
                if self.action_joints:
                    joints = np.array(self._kuka.joint_positions)[:7]
                    joints += DELTA_THETA * self.np_random.normal(joints.shape)
                    action = list(joints) + [0, 0]
                else:
                    action = np.zeros(5)
                    rand_direction = self.np_random.normal((3,))
                    # L2 normalize, so that the random direction is not too high or too low
                    rand_direction /= np.linalg.norm(rand_direction, 2)
                    action[:3] += DELTA_V_CONTINUOUS * rand_direction

            self._kuka.applyAction(list(action))
            p.stepSimulation()

        self._observation = self.getExtendedObservation()

        self.button_pos = np.array(p.getLinkState(self.button_uid, BUTTON_LINK_IDX)[0])
        self.button_pos[2] += BUTTON_DISTANCE_HEIGHT  # Set the target position on the top of the button
        if self.saver is not None:
            self.saver.reset(self._observation, self.getTargetPos(), self.getGroundTruth())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation)

        return np.array(self._observation)

    def __del__(self):
        if CONNECTED_TO_SIMULATOR:
            p.disconnect()

    def getExtendedObservation(self):
        if getNChannels() > 3:
            self.multi_view = True
        self._observation = self.render("rgb_array")
        return self._observation

    def step(self, action):
        # if you choose to do nothing
        if action is None:
            if self.action_joints:
                return self.step2(list(np.array(self._kuka.joint_positions)[:7]) + [0, 0])
            else:
                return self.step2([0, 0, 0, 0, 0])

        self.action = action  # For saver
        if self._is_discrete:
            dv = DELTA_V  # velocity per physics step.
            # Add noise to action
            dv += self.np_random.normal(0.0, scale=NOISE_STD)
            dx = [-dv, dv, 0, 0, 0, 0][action]
            dy = [0, 0, -dv, dv, 0, 0][action]
            if self._force_down:
                dz = [0, 0, 0, 0, -dv, -dv][action]  # Remove up action
            else:
                dz = [0, 0, 0, 0, -dv, dv][action]
            # da = [0, 0, 0, 0, 0, -0.1, 0.1][action]  # end effector angle
            finger_angle = 0.0  # Close the gripper
            # real_action = [dx, dy, -0.002, da, finger_angle]
            real_action = [dx, dy, dz, 0, finger_angle]
            print("Action: [dx, dy, dz, 0, finger_angle]: {}".format(real_action))
        else:
            if self.action_joints:
                arm_joints = np.array(self._kuka.joint_positions)[:7]
                d_theta = DELTA_THETA
                # Add noise to action
                d_theta += self.np_random.normal(0.0, scale=NOISE_STD_JOINTS)
                # append [0,0] for finger angles
                real_action = list(action * d_theta + arm_joints) + [0, 0]  # TODO remove up action
            else:
                dv = DELTA_V_CONTINUOUS
                # Add noise to action
                dv += self.np_random.normal(0.0, scale=NOISE_STD_CONTINUOUS)
                dx = action[0] * dv
                dy = action[1] * dv
                if self._force_down:
                    dz = -abs(action[2] * dv)  # Remove up action
                else:
                    dz = action[2] * dv
                finger_angle = 0.0  # Close the gripper
                real_action = [dx, dy, dz, 0, finger_angle]

        if self.verbose:
            print(np.array2string(np.array(real_action), precision=2))

        return self.step2(real_action)

    def step2(self, action):
        """
        :param action:([float])
        """
        # Apply force to the button
        p.setJointMotorControl2(self.button_uid, BUTTON_GLIDER_IDX, controlMode=p.POSITION_CONTROL, targetPosition=0.1)

        for i in range(self._action_repeat):
            self._kuka.applyAction(action)
            p.stepSimulation()
            if self._termination():
                break
            self._env_step_counter += 1

        self._observation = self.getExtendedObservation()
        if self._renders:
            time.sleep(self._timestep)

        reward = self._reward()
        done = self._termination()
        if self.saver is not None:
            self.saver.step(self._observation, self.action, reward, done, self.getGroundTruth())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation), reward, done, {}

        return np.array(self._observation), reward, done, {}

    def render(self, mode='human', close=False):
        if mode != "rgb_array":
            return np.array([])
        camera_target_pos = self.camera_target_pos

        if self.debug:
            self._cam_dist = p.readUserDebugParameter(self.dist_slider)
            self._cam_yaw = p.readUserDebugParameter(self.yaw_slider)
            self._cam_pitch = p.readUserDebugParameter(self.pitch_slider)
            x = p.readUserDebugParameter(self.x_slider)
            y = p.readUserDebugParameter(self.y_slider)
            z = p.readUserDebugParameter(self.z_slider)
            camera_target_pos = (x, y, z)
            # self._cam_roll = p.readUserDebugParameter(self.roll_slider)

        # TODO: recompute view_matrix and proj_matrix only in debug mode
        view_matrix1 = p.computeViewMatrixFromYawPitchRoll(
            cameraTargetPosition=camera_target_pos,
            distance=self._cam_dist,
            yaw=self._cam_yaw,
            pitch=self._cam_pitch,
            roll=self._cam_roll,
            upAxisIndex=2)
        proj_matrix1 = p.computeProjectionMatrixFOV(
            fov=60, aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
            nearVal=0.1, farVal=100.0)
        (_, _, px1, _, _) = p.getCameraImage(
            width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix1,
            projectionMatrix=proj_matrix1, renderer=self.renderer)
        rgb_array1 = np.array(px1)

        if self.multi_view:
            # adding a second camera on the other side of the robot
            view_matrix2 = p.computeViewMatrixFromYawPitchRoll(
                cameraTargetPosition=(0.316, 0.316, -0.105),
                distance=1.05,
                yaw=32,
                pitch=-13,
                roll=0,
                upAxisIndex=2)
            proj_matrix2 = p.computeProjectionMatrixFOV(
                fov=60, aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
                nearVal=0.1, farVal=100.0)
            (_, _, px2, _, _) = p.getCameraImage(
                width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix2,
                projectionMatrix=proj_matrix2, renderer=self.renderer)
            rgb_array2 = np.array(px2)
            rgb_array_res = np.concatenate((rgb_array1[:, :, :3], rgb_array2[:, :, :3]), axis=2)
        else:
            rgb_array_res = rgb_array1[:, :, :3]
        return rgb_array_res

    def _termination(self):
        if self.terminated or self._env_step_counter > self.max_steps:
            self._observation = self.getExtendedObservation()
            return True
        return False

    def _reward(self):
        gripper_pos = self.getArmPos()
        distance = np.linalg.norm(self.button_pos - gripper_pos, 2)
        # print(distance)

        contact_points = p.getContactPoints(self.button_uid, self._kuka.kuka_uid, BUTTON_LINK_IDX)
        reward = int(len(contact_points) > 0)
        self.n_contacts += reward

        contact_with_table = len(p.getContactPoints(self.table_uid, self._kuka.kuka_uid)) > 0

        if distance > self._max_distance or contact_with_table:
            reward = -1
            self.n_steps_outside += 1
        else:
            self.n_steps_outside = 0

        if contact_with_table or self.n_contacts >= N_CONTACTS_BEFORE_TERMINATION \
                or self.n_steps_outside >= N_STEPS_OUTSIDE_SAFETY_SPHERE:
            self.terminated = True

        if self._shape_reward:
            if self._is_discrete:
                return -distance
            else:
                # Button pushed
                if self.terminated and reward > 0:
                    return 50
                # out of bounds
                elif self.terminated and reward < 0:
                    return -250
                # anything else
                else:
                    return -distance

        return reward
Example #4
0
class OmniRobotEnv(SRLGymEnv):
    """
    OmniRobot robot Environment (Gym wrapper for OmniRobot environment)
    The goal of Omnirobot is to go to the location on the table
    (signaled with a circle sticker on the table)
    :param renders: (bool) Whether to display the GUI or not
    :param is_discrete: (bool) true if action space is discrete vs continuous
    :param save_path: (str) name of the folder where recorded data will be stored
    :param state_dim: (int) When learning states
    :param learn_states: (bool)
    :param record_data: (bool) Set to true, record frames with the rewards.
    :param shape_reward: (bool) Set to true, reward = -distance_to_goal
    :param env_rank: (int) the number ID of the environment
    :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model
    """
    def __init__(self,
                 renders=False,
                 name="Omnirobot",
                 is_discrete=True,
                 save_path='srl_zoo/data/',
                 state_dim=-1,
                 learn_states=False,
                 srl_model="raw_pixels",
                 record_data=False,
                 action_repeat=1,
                 random_target=True,
                 shape_reward=False,
                 simple_continual_target=False,
                 circular_continual_move=False,
                 square_continual_move=False,
                 eight_continual_move=False,
                 short_episodes=False,
                 state_init_override=None,
                 env_rank=0,
                 srl_pipe=None,
                 img_shape=None,
                 **_):

        super(OmniRobotEnv, self).__init__(srl_model=srl_model,
                                           relative_pos=RELATIVE_POS,
                                           env_rank=env_rank,
                                           srl_pipe=srl_pipe)
        if action_repeat != 1:
            raise NotImplementedError
        self.server_port = SERVER_PORT + env_rank
        self.n_contacts = 0
        use_ground_truth = srl_model == 'ground_truth'
        use_srl = srl_model != 'raw_pixels'
        self.use_srl = use_srl or use_ground_truth
        self.use_ground_truth = use_ground_truth
        self.use_joints = False
        self.relative_pos = RELATIVE_POS
        self._is_discrete = is_discrete
        self.observation = []
        # Start simulation with first observation
        self._env_step_counter = 0
        self.episode_terminated = False
        self.state_dim = state_dim
        if img_shape is None:
            self.img_shape = (3, RENDER_HEIGHT, RENDER_WIDTH)
        else:
            self.img_shape = img_shape
        self._renders = renders
        self._shape_reward = shape_reward
        self.cuda = th.cuda.is_available()
        self.target_pos = None
        self.saver = None
        self._random_target = random_target
        self.simple_continual_target = simple_continual_target
        self.circular_continual_move = circular_continual_move
        self.square_continual_move = square_continual_move
        self.eight_continual_move = eight_continual_move
        self.short_episodes = short_episodes

        if self._is_discrete:
            self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        else:
            action_dim = 2
            self.action_space = RingBox(positive_low=ACTION_POSITIVE_LOW,
                                        positive_high=ACTION_POSITIVE_HIGH,
                                        negative_low=ACTION_NEGATIVE_LOW,
                                        negative_high=ACTION_NEGATIVE_HIGH,
                                        shape=np.array([action_dim]),
                                        dtype=np.float32)
        # SRL model
        if self.use_srl:
            if use_ground_truth:
                self.state_dim = self.getGroundTruthDim()
            self.dtype = np.float32
            self.observation_space = spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=(self.state_dim, ),
                                                dtype=self.dtype)
        else:
            self.dtype = np.uint8
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.img_shape[2],
                                                       self.img_shape[1], 3),
                                                dtype=self.dtype)

        if record_data:
            print("Recording data...")
            self.saver = EpisodeSaver(name,
                                      0,
                                      self.state_dim,
                                      globals_=getGlobals(),
                                      relative_pos=RELATIVE_POS,
                                      learn_states=learn_states,
                                      path=save_path)

        if USING_OMNIROBOT_SIMULATOR:
            self.socket = OmniRobotSimulatorSocket(
                simple_continual_target=simple_continual_target,
                circular_continual_move=circular_continual_move,
                square_continual_move=square_continual_move,
                eight_continual_move=eight_continual_move,
                output_size=[self.img_shape[2], self.img_shape[1]],
                random_target=self._random_target,
                state_init_override=state_init_override)
        else:
            # Initialize Baxter effector by connecting to the Gym bridge ROS node:
            self.context = zmq.Context()
            self.socket = self.context.socket(zmq.PAIR)
            self.socket.connect("tcp://{}:{}".format(HOSTNAME,
                                                     self.server_port))

            # note: if takes too long, run first client, then server
            print("Waiting for server connection at port {}...".format(
                self.server_port))

            # hide the output of server
            msg = self.socket.recv_json()
            print(
                "Connected to server on port {} (received message: {})".format(
                    self.server_port, msg))

        self.action = [0, 0]
        self.reward = 0
        self.robot_pos = np.array([0, 0])

        # Initialize the state
        if self._renders:
            self.image_plot = None

    def actionPolicyTowardTarget(self):
        """
        :return: (int) action
        """
        if abs(self.robot_pos[0] -
               self.target_pos[0]) > abs(self.robot_pos[1] -
                                         self.target_pos[1]):

            if self._is_discrete:
                return int(
                    Move.FORWARD
                ) if self.robot_pos[0] < self.target_pos[0] else int(
                    Move.BACKWARD)
                # forward                                        # backward
            else:
                return DELTA_POS if self.robot_pos[0] < self.target_pos[
                    0] else -DELTA_POS
        else:
            if self._is_discrete:
                # left                                          # right
                return int(
                    Move.LEFT
                ) if self.robot_pos[1] < self.target_pos[1] else int(
                    Move.RIGHT)
            else:
                return DELTA_POS if self.robot_pos[1] < self.target_pos[
                    1] else -DELTA_POS

    def step(self,
             action,
             generated_observation=None,
             action_proba=None,
             action_grid_walker=None):
        """

        :param :action: (int)
        :param generated_observation:
        :param action_proba:
        :param action_grid_walker: # Whether or not we want to override the action with the one from a grid walker
        :return: (tensor (np.ndarray)) observation, int reward, bool done, dict extras)
        """
        if action_grid_walker is None:
            action_to_step = action
            action_from_teacher = None
        else:
            action_to_step = action_grid_walker
            action_from_teacher = action
            assert self.action_space.contains(action_from_teacher)

        if not self._is_discrete:
            action_to_step = np.array(action_to_step)
        assert self.action_space.contains(action_to_step)

        # Convert int action to action in (x,y,z) space
        # serialize the action
        if isinstance(action_to_step, np.ndarray):
            self.action = action_to_step.tolist()
        elif hasattr(action_to_step,
                     'dtype'):  # convert numpy type to python type
            self.action = action.item()
        else:
            self.action = action_to_step

        self._env_step_counter += 1

        # Send the action to the server
        self.socket.send_json({
            "command": "action",
            "action": self.action,
            "is_discrete": self._is_discrete,
            "step_counter": self._env_step_counter
        })

        # Receive state data (position, etc), important to update state related values
        self.getEnvState()

        #  Receive a camera image from the server
        self.observation = self.getObservation(
        ) if generated_observation is None else generated_observation * 255
        done = self._hasEpisodeTerminated()

        self.render()

        if self.saver is not None:
            self.saver.step(self.observation,
                            action_from_teacher if action_grid_walker
                            is not None else action_to_step,
                            self.reward,
                            done,
                            self.getGroundTruth(),
                            action_proba=action_proba)
        old_observation = self.getObservation()

        if self.use_srl:
            return self.getSRLState(
                self.observation if generated_observation is None else
                old_observation), self.reward, done, {}
        else:
            return self.observation, self.reward, done, {}

    def getEnvState(self):
        """
        Returns a dictionary containing info about the environment state.
        :return: (dict) state_data containing data related to the state: target_pos,
        robot_pos and reward.
        """
        state_data = self.socket.recv_json()
        self.reward = state_data["reward"]
        self.target_pos = np.array(state_data["target_pos"])
        self.robot_pos = np.array(state_data["position"])

        return state_data

    def getObservation(self):
        """
        Receive the observation image using a socket
        :return: (numpy ndarray) observation
        """
        # Receive a camera image from the server
        self.observation = recvMatrix(self.socket)
        # Resize it:
        self.observation = cv2.resize(self.observation,
                                      (self.img_shape[2], self.img_shape[1]),
                                      interpolation=cv2.INTER_AREA)
        return self.observation

    def getTargetPos(self):
        """
        :return (numpy array): Position of the target (button)
        """
        return self.target_pos

    @staticmethod
    def getGroundTruthDim():
        """
        :return: (int)
        """
        return 2

    def getGroundTruth(self):
        """
        Alias for getRobotPos for compatibility between envs
        :return: (numpy array)
        """
        return np.array(self.getRobotPos())

    def getRobotPos(self):
        """
        :return: ([float])->  np.ndarray Position (x, y, z) of Baxter left gripper
        """
        return self.robot_pos

    def reset(self,
              aligned=False,
              init_with_real=False,
              generated_observation=None,
              state_override=None):
        """
        Reset the environment
        :param generated_observation:
        :param state_override:
        :return: (numpy ndarray) first observation of the env
        """
        self.episode_terminated = False
        # Step count since episode start
        self._env_step_counter = 0
        # set n contact count
        self.n_contacts = 0
        self.socket.send_json({
            "command": "reset",
            "step_counter": self._env_step_counter,
            "aligned": aligned
        })
        # Update state related variables, important step to get both data and
        # metadata that allow reading the observation image
        self.getEnvState()
        if not init_with_real:
            self.robot_pos = np.array(
                [0, 0]) if state_override is None else state_override
        self.observation = self.getObservation(
        ) if generated_observation is None else generated_observation * 255
        if self.saver is not None:
            self.saver.reset(self.observation, self.getTargetPos(),
                             self.getGroundTruth())
        old_observation = self.getObservation()

        if self.use_srl:
            return self.getSRLState(self.observation if generated_observation
                                    is None else old_observation)
        else:
            return self.observation

    def _hasEpisodeTerminated(self):
        """
        Returns True if the episode is over and False otherwise
        """
        if (self.episode_terminated or self._env_step_counter > MAX_STEPS) or \
                (self.n_contacts >= N_CONTACTS_BEFORE_TERMINATION and self.short_episodes and
                 self.simple_continual_target) or \
                (self._env_step_counter > MAX_STEPS_CIRCULAR_TASK_SHORT_EPISODES and self.short_episodes and
                 self.circular_continual_move):
            return True

        if np.abs(self.reward -
                  REWARD_TARGET_REACH) < 0.000001:  # reach the target
            self.n_contacts += 1
        else:
            self.n_contacts += 0
        return False

    def closeServerConnection(self):
        """
        To be called at the end of running the program, externally
        """
        print("Omnirobot_env client exiting and closing socket...")
        self.socket.send_json({"command": "exit"})
        self.socket.close()

    def render(self, mode='rgb_array'):
        """
        :param mode: (str)
        :return: (numpy array) BGR image
        """
        if self._renders:
            if mode != "rgb_array":
                print('render in human mode not yet supported')
                return np.array([])

            plt.ion()  # needed for interactive update
            if self.image_plot is None:
                plt.figure('Omnirobot RL')
                self.initVisualizeBoundary()
                self.visualizeBoundary()
                self.image_plot = plt.imshow(self.observation_with_boundary,
                                             cmap='gray')
                self.image_plot.axes.grid(False)

            else:
                self.visualizeBoundary()
                self.image_plot.set_data(self.observation_with_boundary)
            plt.draw()
            # Wait a bit, so that plot is visible
            plt.pause(0.0001)
        return self.observation

    def initVisualizeBoundary(self):
        with open(CAMERA_INFO_PATH, 'r') as stream:
            try:
                contents = yaml.load(stream)
                camera_matrix = np.array(
                    contents['camera_matrix']['data']).reshape((3, 3))
                distortion_coefficients = np.array(
                    contents['distortion_coefficients']['data']).reshape(
                        (1, 5))
            except yaml.YAMLError as exc:
                print(exc)

        # camera installation info
        r = R.from_euler('xyz', CAMERA_ROT_EULER_COORD_GROUND, degrees=True)
        camera_rot_mat_coord_ground = r.as_dcm()

        pos_transformer = PosTransformer(camera_matrix,
                                         distortion_coefficients,
                                         CAMERA_POS_COORD_GROUND,
                                         camera_rot_mat_coord_ground)

        self.boundary_coner_pixel_pos = np.zeros((2, 4))
        # assume that image is undistorted
        self.boundary_coner_pixel_pos[:, 0] = \
            pos_transformer.phyPosGround2PixelPos([MIN_X, MIN_Y], return_distort_image_pos=False).squeeze()
        self.boundary_coner_pixel_pos[:, 1] = \
            pos_transformer.phyPosGround2PixelPos([MAX_X, MIN_Y], return_distort_image_pos=False).squeeze()
        self.boundary_coner_pixel_pos[:, 2] = \
            pos_transformer.phyPosGround2PixelPos([MAX_X, MAX_Y], return_distort_image_pos=False).squeeze()
        self.boundary_coner_pixel_pos[:, 3] = \
            pos_transformer.phyPosGround2PixelPos([MIN_X, MAX_Y], return_distort_image_pos=False).squeeze()

        # transform the corresponding points into cropped image
        self.boundary_coner_pixel_pos = self.boundary_coner_pixel_pos - (
            np.array(ORIGIN_SIZE) - np.array(CROPPED_SIZE)).reshape(2, 1) / 2.0

        # transform the corresponding points into resized image (self.img_shape[2], self.img_shape[1])
        self.boundary_coner_pixel_pos[
            0, :] *= self.img_shape[2] / CROPPED_SIZE[0]
        self.boundary_coner_pixel_pos[
            1, :] *= self.img_shape[1] / CROPPED_SIZE[1]

        self.boundary_coner_pixel_pos = np.around(
            self.boundary_coner_pixel_pos).astype(np.int)

        # Create square for vizu of objective in continual square task
        if self.square_continual_move:

            self.boundary_coner_pixel_pos_continual = np.zeros((2, 4))
            # assume that image is undistorted
            self.boundary_coner_pixel_pos_continual[:, 0] = \
                pos_transformer.phyPosGround2PixelPos([-RADIUS, -RADIUS], return_distort_image_pos=False).squeeze()
            self.boundary_coner_pixel_pos_continual[:, 1] = \
                pos_transformer.phyPosGround2PixelPos([RADIUS, -RADIUS],  return_distort_image_pos=False).squeeze()
            self.boundary_coner_pixel_pos_continual[:, 2] = \
                pos_transformer.phyPosGround2PixelPos([RADIUS, RADIUS], return_distort_image_pos=False).squeeze()
            self.boundary_coner_pixel_pos_continual[:, 3] = \
                pos_transformer.phyPosGround2PixelPos([-RADIUS, RADIUS], return_distort_image_pos=False).squeeze()

            # transform the corresponding points into cropped image
            self.boundary_coner_pixel_pos_continual = self.boundary_coner_pixel_pos_continual - (
                np.array(ORIGIN_SIZE) - np.array(CROPPED_SIZE)).reshape(
                    2, 1) / 2.0

            # transform the corresponding points into resized image (self.img_shape[2], self.img_shape[1])
            self.boundary_coner_pixel_pos_continual[
                0, :] *= self.img_shape[2] / CROPPED_SIZE[0]
            self.boundary_coner_pixel_pos_continual[
                1, :] *= self.img_shape[1] / CROPPED_SIZE[1]

            self.boundary_coner_pixel_pos_continual = np.around(
                self.boundary_coner_pixel_pos_continual).astype(np.int)

        elif self.circular_continual_move:
            self.center_coordinates = \
                pos_transformer.phyPosGround2PixelPos([0, 0], return_distort_image_pos=False).squeeze()
            self.center_coordinates = self.center_coordinates - (
                np.array(ORIGIN_SIZE) - np.array(CROPPED_SIZE)) / 2.0
            # transform the corresponding points into resized image (self.img_shape[2], self.img_shape[1])
            self.center_coordinates[0] *= self.img_shape[2] / CROPPED_SIZE[0]
            self.center_coordinates[1] *= self.img_shape[1] / CROPPED_SIZE[1]

            self.center_coordinates = np.around(
                self.center_coordinates).astype(np.int)

            # Points to convert radisu in env space
            self.boundary_coner_pixel_pos_continual = \
                pos_transformer.phyPosGround2PixelPos([0, RADIUS], return_distort_image_pos=False).squeeze()

            # transform the corresponding points into cropped image
            self.boundary_coner_pixel_pos_continual = self.boundary_coner_pixel_pos_continual - (
                np.array(ORIGIN_SIZE) - np.array(CROPPED_SIZE)) / 2.0

            # transform the corresponding points into resized image (self.img_shape[2], self.img_shape[1])
            self.boundary_coner_pixel_pos_continual[
                0] *= self.img_shape[2] / CROPPED_SIZE[0]
            self.boundary_coner_pixel_pos_continual[
                1] *= self.img_shape[1] / CROPPED_SIZE[1]

            self.boundary_coner_pixel_pos_continual = np.around(
                self.boundary_coner_pixel_pos_continual).astype(np.int)

    def visualizeBoundary(self):
        """
        visualize the unvisible boundary, should call initVisualizeBoundary first
        """
        self.observation_with_boundary = self.observation.copy()

        # Add boundary continual
        if self.square_continual_move:
            for idx in range(4):
                idx_next = idx + 1
                cv2.line(
                    self.observation_with_boundary,
                    tuple(self.boundary_coner_pixel_pos_continual[:, idx]),
                    tuple(self.boundary_coner_pixel_pos_continual[:, idx_next %
                                                                  4]),
                    (0, 0, 200), 1)

        elif self.circular_continual_move:
            radius_converted = np.linalg.norm(
                self.center_coordinates -
                self.boundary_coner_pixel_pos_continual)
            cv2.circle(self.observation_with_boundary,
                       tuple(self.center_coordinates),
                       np.float32(radius_converted), (0, 0, 200), 2)

        # Add boundary of env
        for idx in range(4):
            idx_next = idx + 1
            cv2.line(self.observation_with_boundary,
                     tuple(self.boundary_coner_pixel_pos[:, idx]),
                     tuple(self.boundary_coner_pixel_pos[:, idx_next % 4]),
                     (200, 0, 0), 1)
class MobileRobotGymEnv(SRLGymEnv):
    """
    Gym wrapper for Mobile Robot environment
    WARNING: to be compatible with kuka scripts, additional keyword arguments are discarded
    :param urdf_root: (str) Path to pybullet urdf files
    :param renders: (bool) Whether to display the GUI or not
    :param is_discrete: (bool) Whether to use discrete or continuous actions
    :param name: (str) name of the folder where recorded data will be stored
    :param max_distance: (float) Max distance between end effector and the button (for negative reward)
    :param shape_reward: (bool) Set to true, reward = -distance_to_goal
    :param record_data: (bool) Set to true, record frames with the rewards.
    :param random_target: (bool) Set the target to a random position
    :param state_dim: (int) When learning states
    :param learn_states: (bool)
    :param verbose: (bool) Whether to print some debug info
    :param save_path: (str) location where the saved data should go
    :param env_rank: (int) the number ID of the environment
    :param pipe: (Queue, [Queue]) contains the input and output of the SRL model
    :param fpv: (bool) enable first person view camera
    :param srl_model: (str) The SRL_model used
    """
    def __init__(self,
                 urdf_root=pybullet_data.getDataPath(),
                 renders=False,
                 is_discrete=True,
                 name="mobile_robot",
                 max_distance=1.6,
                 shape_reward=False,
                 record_data=False,
                 srl_model="raw_pixels",
                 random_target=False,
                 force_down=True,
                 state_dim=-1,
                 learn_states=False,
                 verbose=False,
                 save_path='srl_zoo/data/',
                 env_rank=0,
                 srl_pipe=None,
                 fpv=False,
                 img_shape=None,
                 **_):
        super(MobileRobotGymEnv, self).__init__(srl_model=srl_model,
                                                relative_pos=RELATIVE_POS,
                                                env_rank=env_rank,
                                                srl_pipe=srl_pipe)
        self._timestep = 1. / 240.
        self._urdf_root = urdf_root
        self._observation = []
        self._env_step_counter = 0
        self._renders = renders
        self.img_shape = img_shape  # channel first !!
        if self.img_shape is None:
            self._width = RENDER_WIDTH
            self._height = RENDER_HEIGHT
        else:
            self._height, self._width = self.img_shape[1:]
        self._cam_dist = 4.4
        self._cam_yaw = 90
        self._cam_pitch = -90
        self._cam_roll = 0
        self._max_distance = max_distance
        self._shape_reward = shape_reward
        self._random_target = random_target
        self._force_down = force_down
        self.camera_target_pos = (2, 2, 0)
        self._is_discrete = is_discrete
        self.terminated = False
        self.renderer = p.ER_TINY_RENDERER
        self.debug = False
        self.n_contacts = 0
        self.state_dim = state_dim
        self.relative_pos = RELATIVE_POS
        self.cuda = th.cuda.is_available()
        self.saver = None
        self.verbose = verbose
        self.max_steps = MAX_STEPS
        self.robot_pos = np.zeros(3)
        self.robot_uid = None
        self.target_pos = np.zeros(3)
        self.target_uid = None
        # Boundaries of the square env
        self._min_x, self._max_x = 0, 4
        self._min_y, self._max_y = 0, 4
        self.has_bumped = False  # Used for handling collisions
        self.collision_margin = 0.1
        self.walls = None
        self.fpv = fpv
        self.srl_model = srl_model

        if record_data:
            self.saver = EpisodeSaver(name,
                                      max_distance,
                                      state_dim,
                                      globals_=getGlobals(),
                                      relative_pos=RELATIVE_POS,
                                      learn_states=learn_states,
                                      path=save_path)

        if self._renders:
            client_id = p.connect(p.SHARED_MEMORY)
            if client_id < 0:
                p.connect(p.GUI)
            p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])

            self.debug = True
            # Debug sliders for moving the camera
            self.x_slider = p.addUserDebugParameter("x_slider", -10, 10,
                                                    self.camera_target_pos[0])
            self.y_slider = p.addUserDebugParameter("y_slider", -10, 10,
                                                    self.camera_target_pos[1])
            self.z_slider = p.addUserDebugParameter("z_slider", -10, 10,
                                                    self.camera_target_pos[2])
            self.dist_slider = p.addUserDebugParameter("cam_dist", 0, 10,
                                                       self._cam_dist)
            self.yaw_slider = p.addUserDebugParameter("cam_yaw", -180, 180,
                                                      self._cam_yaw)
            self.pitch_slider = p.addUserDebugParameter(
                "cam_pitch", -180, 180, self._cam_pitch)

        else:
            p.connect(p.DIRECT)

        global CONNECTED_TO_SIMULATOR
        CONNECTED_TO_SIMULATOR = True

        if self._is_discrete:
            self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        else:
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(2, ),
                                           dtype=np.float32)

        if self.srl_model == "ground_truth":
            self.state_dim = self.getGroundTruthDim()

        if self.srl_model == "raw_pixels":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self._height,
                                                       self._width, 3),
                                                dtype=np.uint8)
        else:
            self.observation_space = spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=(self.state_dim, ),
                                                dtype=np.float32)

    def getTargetPos(self):
        # Return only the [x, y] coordinates
        return self.target_pos[:2]

    # @staticmethod
    def getGroundTruthDim(self):
        """
        The new convention for the ground truth (GT): GT should include target position under random target
        setting. 
        """
        ## HACK : Monkey-Patch, shit solution to solve problem.
        if not self._random_target:
            return 2
        else:
            return 4

    def getRobotPos(self):
        # Return only the [x, y] coordinates
        return np.array(self.robot_pos)[:2]

    def getGroundTruth(self):
        """
        The new convention for the ground truth (GT): GT should include target position under random target
        setting. A better solution would be "change all the environment files, especially srl_env.py" !!! 
        
        """
        ## HACK: Monkey-Patch, shit solution to solve problem.

        robot_pos = self.getRobotPos()
        if self._random_target:
            if self.relative_pos:
                ## HACK here! Change srl_env.py and all the other envs in the future !!! TODO TODO TODO
                return robot_pos
            else:
                ## check 'envs.observation_space' in rl_baselines/base_classes.py (before self.model.learn) !!!
                target_pos = self.getTargetPos()
                return np.concatenate([robot_pos, target_pos], axis=0)

        else:
            if self.relative_pos:
                ## HACK here! Change srl_env.py and all the other envs in the future !!! TODO TODO TODO
                return robot_pos
            else:
                return robot_pos

    def reset(self):
        self.terminated = False
        p.resetSimulation()
        p.setPhysicsEngineParameter(numSolverIterations=150)
        p.setTimeStep(self._timestep)
        p.loadURDF(os.path.join(self._urdf_root, "plane.urdf"), [0, 0, 0])
        p.setGravity(0, 0, -10)

        # Init the robot randomly
        x_start = self._max_x / 2 + self.np_random.uniform(
            -self._max_x / 3, self._max_x / 3)
        y_start = self._max_y / 2 + self.np_random.uniform(
            -self._max_y / 3, self._max_y / 3)
        self.robot_pos = np.array([x_start, y_start, 0])

        # Initialize target position
        x_pos = 0.9 * self._max_x
        y_pos = self._max_y * 3 / 4
        if self._random_target:
            margin = 0.1 * self._max_x
            x_pos = self.np_random.uniform(self._min_x + margin,
                                           self._max_x - margin)
            y_pos = self.np_random.uniform(self._min_y + margin,
                                           self._max_y - margin)

        self.target_uid = p.loadURDF("/urdf/cylinder.urdf", [x_pos, y_pos, 0],
                                     useFixedBase=True)
        self.target_pos = np.array([x_pos, y_pos, 0])

        # Add walls
        # Path to the urdf file
        wall_urdf = "/urdf/wall.urdf"
        # Rgba colors
        red, green, blue = [0.8, 0, 0, 1], [0, 0.8, 0, 1], [0, 0, 0.8, 1]

        wall_left = p.loadURDF(wall_urdf, [self._max_x / 2, 0, 0],
                               useFixedBase=True)
        # Change color
        p.changeVisualShape(wall_left, -1, rgbaColor=red)

        # getQuaternionFromEuler -> define orientation
        wall_bottom = p.loadURDF(wall_urdf, [self._max_x, self._max_y / 2, 0],
                                 p.getQuaternionFromEuler([0, 0, np.pi / 2]),
                                 useFixedBase=True)

        wall_right = p.loadURDF(wall_urdf, [self._max_x / 2, self._max_y, 0],
                                useFixedBase=True)
        p.changeVisualShape(wall_right, -1, rgbaColor=green)

        wall_top = p.loadURDF(wall_urdf, [self._min_x, self._max_y / 2, 0],
                              p.getQuaternionFromEuler([0, 0, np.pi / 2]),
                              useFixedBase=True)
        p.changeVisualShape(wall_top, -1, rgbaColor=blue)

        self.walls = [wall_left, wall_bottom, wall_right, wall_top]

        # Add mobile robot
        self.robot_uid = p.loadURDF(os.path.join(self._urdf_root,
                                                 "racecar/racecar.urdf"),
                                    self.robot_pos,
                                    useFixedBase=True)

        self._env_step_counter = 0
        for _ in range(50):
            p.stepSimulation()

        self._observation = self.getObservation()

        if self.saver is not None:
            self.saver.reset(self._observation, self.getTargetPos(),
                             self.getRobotPos())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation)

        return np.array(self._observation)

    def __del__(self):
        if CONNECTED_TO_SIMULATOR:
            p.disconnect()

    def getObservation(self):
        """
        :return: (numpy array)
        """
        self._observation = self.render("rgb_array")
        return self._observation

    def step(self, action):
        # True if it has bumped against a wall
        self.has_bumped = False
        if self._is_discrete:
            dv = DELTA_POS
            # Add noise to action
            dv += self.np_random.normal(0.0, scale=NOISE_STD)
            dx = [-dv, dv, 0, 0][action]
            dy = [0, 0, -dv, dv][action]
            real_action = np.array([dx, dy])
        else:
            dv = DELTA_POS
            # Add noise to action
            dv += self.np_random.normal(0.0, scale=NOISE_STD)
            # scale action amplitude between -dv and dv
            real_action = np.maximum(np.minimum(action, 1), -1) * dv
        if self.verbose:
            print(np.array2string(np.array(real_action), precision=2))

        previous_pos = self.robot_pos.copy()
        self.robot_pos[:2] += real_action
        # Handle collisions
        for i, (limit, robot_dim) in enumerate(
                zip([self._max_x, self._max_y], [ROBOT_LENGTH, ROBOT_WIDTH])):
            margin = self.collision_margin + robot_dim / 2
            # If it has bumped against a wall, stay at the previous position
            if self.robot_pos[i] < margin or self.robot_pos[i] > limit - margin:
                self.has_bumped = True
                self.robot_pos = previous_pos
                break
        # Update mobile robot position
        p.resetBasePositionAndOrientation(self.robot_uid, self.robot_pos,
                                          [0, 0, 0, 1])

        p.stepSimulation()
        self._env_step_counter += 1

        self._observation = self.getObservation()

        reward = self._reward()
        done = self._termination()
        if self.saver is not None:
            self.saver.step(self._observation, action, reward, done,
                            self.getRobotPos())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation), reward, done, {}

        return np.array(self._observation), reward, done, {}

    def render(self, mode='human', close=False):
        if mode != "rgb_array":
            return np.array([])
        camera_target_pos = self.camera_target_pos

        if self.debug:
            self._cam_dist = p.readUserDebugParameter(self.dist_slider)
            self._cam_yaw = p.readUserDebugParameter(self.yaw_slider)
            self._cam_pitch = p.readUserDebugParameter(self.pitch_slider)
            x = p.readUserDebugParameter(self.x_slider)
            y = p.readUserDebugParameter(self.y_slider)
            z = p.readUserDebugParameter(self.z_slider)
            camera_target_pos = (x, y, z)

        # TODO: recompute view_matrix and proj_matrix only in debug mode
        view_matrix = p.computeViewMatrixFromYawPitchRoll(
            cameraTargetPosition=camera_target_pos,
            distance=self._cam_dist,
            yaw=self._cam_yaw,
            pitch=self._cam_pitch,
            roll=self._cam_roll,
            upAxisIndex=2)
        proj_matrix = p.computeProjectionMatrixFOV(fov=60,
                                                   aspect=float(self._width) /
                                                   self._height,
                                                   nearVal=0.1,
                                                   farVal=100.0)
        (_, _, px1, _, _) = p.getCameraImage(width=self._width,
                                             height=self._height,
                                             viewMatrix=view_matrix,
                                             projectionMatrix=proj_matrix,
                                             renderer=self.renderer)
        rgb_array = np.array(px1)

        rgb_array_res = rgb_array[:, :, :3]

        # if first person view, then stack the obersvation from the car camera
        if self.fpv:
            # move camera
            view_matrix = p.computeViewMatrixFromYawPitchRoll(
                cameraTargetPosition=(self.robot_pos[0] - 0.25,
                                      self.robot_pos[1], 0.15),
                distance=0.3,
                yaw=self._cam_yaw,
                pitch=-17,
                roll=self._cam_roll,
                upAxisIndex=2)
            proj_matrix = p.computeProjectionMatrixFOV(
                fov=90,
                aspect=float(self._width) / self._height,
                nearVal=0.1,
                farVal=100.0)
            # get and stack image
            (_, _, px1, _, _) = p.getCameraImage(width=self._width,
                                                 height=self._height,
                                                 viewMatrix=view_matrix,
                                                 projectionMatrix=proj_matrix,
                                                 renderer=self.renderer)
            rgb_array = np.array(px1)
            rgb_array_res = np.dstack([rgb_array_res, rgb_array[:, :, :3]])

        return rgb_array_res

    def _termination(self):
        """
        :return: (bool)
        """
        if self.terminated or self._env_step_counter > self.max_steps:
            self._observation = self.getObservation()
            return True
        return False

    def _reward(self):
        """
        :return: (float)
        """
        # Distance to target
        distance = np.linalg.norm(self.getTargetPos() - self.robot_pos[:2], 2)
        reward = 0

        if distance <= REWARD_DIST_THRESHOLD:
            reward = 1
            # self.terminated = True

        # Negative reward when it bumps into a wall
        if self.has_bumped:
            reward = -1

        if self._shape_reward:
            return -distance
        return reward
class LabyrinthEnv3(SRLGymEnv):
    """

    This Labyrinth environment could can at speed 36,000 FPS on 10 CPUs (Intel i9-9900K)

    Labyrinth-v3: 
                add two Palm trees as obstacle (could be destroyed)
                add two Oak barrel as obstacle (could be moved and destroyed against wall)

    :param name: (str) name of the folder where recorded data will be stored
    :param max_distance: (float) Max distance between end effector and the button (for negative reward)

    :param record_data: (bool) Set to true, record frames with the rewards.
    :param random_target: (bool) Set the target to a random position
    :param state_dim: (int) When learning states

    :param verbose: (bool) Whether to print some debug info
    :param save_path: (str) location where the saved data should go
    :param env_rank: (int) the number ID of the environment
    :param pipe: (Queue, [Queue]) contains the input and output of the SRL model

    :param srl_model: (str) The SRL_model used
    """
    def __init__(self,
                 renders=False,
                 is_discrete=True,
                 name="labyrinth",
                 max_distance=1.6,
                 shape_reward=False,
                 record_data=False,
                 srl_model="raw_pixels",
                 random_target=False,
                 state_dim=-1,
                 verbose=False,
                 save_path='srl_zoo/data/',
                 env_rank=0,
                 srl_pipe=None,
                 img_shape=None,
                 **_):
        super(LabyrinthEnv3, self).__init__(srl_model=srl_model,
                                            relative_pos=False,
                                            env_rank=env_rank,
                                            srl_pipe=srl_pipe)
        self._observation = []
        self._renders = renders
        self.img_shape = img_shape  # channel first !!
        if self.img_shape is None:
            self._width = RENDER_WIDTH
            self._height = RENDER_HEIGHT
        else:
            self._height, self._width = self.img_shape[1:]
        # self._shape_reward = shape_reward
        self._random_target = random_target
        self.state_dim = state_dim
        self.saver = None
        self.verbose = verbose
        self.max_steps = MAX_STEPS
        self.robot_pos = np.zeros(2, dtype=int)  # index on the self.map array

        self.target_pos = [
        ]  # list of indexes (target positions) on the self.map array
        self.previous_robot_pos = None
        self.has_bumped = False  # Used for handling collisions
        self.count_collected_tresors = 0
        self._env_step_counter = 0
        self.srl_model = srl_model
        self.maze_size = MAZE_SIZE
        self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        if record_data:
            self.saver = EpisodeSaver(name,
                                      max_distance,
                                      state_dim,
                                      globals_=getGlobals(),
                                      path=save_path)

        if self.srl_model == "ground_truth":
            self.state_dim = self.getGroundTruthDim()

        if self.srl_model == "raw_pixels":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self._height,
                                                       self._width, 3),
                                                dtype=np.uint8)
        else:
            self.observation_space = spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=(self.state_dim, ),
                                                dtype=np.float32)

    def create_map(self):
        """
        -1 is wall, 0 is free space, 1 is key(tresor), 2 is robot, 3 is obstacle (Palm tree) 
        4 is oak
        """
        # Create map without robot
        # put walls
        self.map = np.zeros((self.maze_size, self.maze_size))
        self.map[:, 0] = -1
        self.map[:, -1] = -1
        self.map[0, :] = -1
        self.map[-1, :] = -1
        self.map[:3, 4] = -1
        self.map[-3:, 4] = -1
        # put tresors (targets)
        self.target_pos = []
        self.obstacle_pos = []
        self.oak_pos = []
        if not self._random_target:
            self.target_pos.append(np.array([1, -2], dtype=int))
            self.target_pos.append(np.array([-2, 1], dtype=int))
            self.obstacle_pos.append(np.array([2, 2], dtype=int))
        else:
            valid_target_pos_list = []
            for i in range(self.maze_size):
                for j in range(self.maze_size):
                    if self.map[i, j] == 0:
                        valid_target_pos_list.append(
                            np.array([i, j], dtype=int))
            obstacle_ind = self.np_random.choice(np.arange(
                len(valid_target_pos_list)),
                                                 2,
                                                 replace=False)
            for index in sorted(obstacle_ind, reverse=True):
                self.obstacle_pos.append(valid_target_pos_list.pop(index))

            oak_ind = self.np_random.choice(np.arange(
                len(valid_target_pos_list)),
                                            2,
                                            replace=False)
            for index in sorted(oak_ind, reverse=True):
                self.oak_pos.append(valid_target_pos_list.pop(index))

            for index in self.np_random.choice(
                    np.arange(len(valid_target_pos_list)), 2,
                    replace=False):  # randomly choose two targets
                self.target_pos.append(valid_target_pos_list[index])

        for key_pos in self.target_pos:
            self.map[key_pos[0], key_pos[1]] = 1
        for key_pos in self.obstacle_pos:
            self.map[key_pos[0], key_pos[1]] = 3
        for key_pos in self.oak_pos:
            self.map[key_pos[0], key_pos[1]] = 4

        valid_robot_pos_list = []
        for i in range(self.maze_size):
            for j in range(self.maze_size):
                if self.map[i, j] == 0:
                    valid_robot_pos_list.append(np.array([i, j], dtype=int))

        # load images
        target_img_ori = cv2.imread("./environments/labyrinth/tresors_128.png")
        robot_img_ori = cv2.imread("./environments/labyrinth/corsair_128.png")
        palm_img_ori = cv2.imread("./environments/labyrinth/palm_128.jpg")
        oak_img_ori = cv2.imread("./environments/labyrinth/oak_128.jpg")
        map_h, map_w = self.map.shape
        self.square_size = int(self._height / map_h)
        self.target_img = cv2.resize(
            target_img_ori, (self.square_size, self.square_size))[..., ::-1]
        self.robot_img = cv2.resize(
            robot_img_ori, (self.square_size, self.square_size))[..., ::-1]
        self.palm_img = cv2.resize(
            palm_img_ori, (self.square_size, self.square_size))[..., ::-1]
        self.oak_img = cv2.resize(
            oak_img_ori, (self.square_size, self.square_size))[..., ::-1]

        return valid_robot_pos_list

    def getGroundTruthDim(self):
        """
        The new convention for the ground truth (GT): GT should include target position under random target
        setting. 
        """
        return self.maze_size**2

    def getGroundTruth(self):
        return self.map.ravel()

    def reset(self):
        self.count_collected_tresors = 0
        self._env_step_counter = 0
        self.has_bumped = False
        self.previous_robot_pos = None
        self._observation = None
        valid_robot_pos_list = self.create_map()
        # put robot
        self.robot_pos = valid_robot_pos_list[self.np_random.choice(
            np.arange(len(valid_robot_pos_list)))]
        self.map[self.robot_pos[0], self.robot_pos[1]] = 2

        self._observation = self.getObservation(start=True)
        if self.saver is not None:
            self.saver.reset(self._observation, np.zeros(2), np.zeros(2))
        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation)

        return self._observation

    def getObservation(self, start=False):
        """
        :return: (numpy array)
        """
        self._observation = self.render("rgb_array", start=start)
        return self._observation

    def valid_action(self, action):
        """
        The most important function. It's define the rule of Labyrinth
        check whether an action is valid
        return (bool)
        """
        if action == 0:
            real_action = np.array([1, 0])
        elif action == 1:
            real_action = np.array([0, 1])
        elif action == 2:
            real_action = np.array([-1, 0])
        elif action == 3:
            real_action = np.array([0, -1])
        else:
            raise NotImplementedError

        next_pos = self.robot_pos + real_action
        # import ipdb; ipdb.set_trace()
        valid = np.all(next_pos >= 0) and np.all(
            next_pos < self.maze_size) and (self.map[next_pos[0], next_pos[1]]
                                            != -1)
        return valid, next_pos, real_action

    def step(self, action):
        ## action = 0, 1, 2, 3
        # True if it has bumped against a wall
        self._env_step_counter += 1

        self.has_bumped = False
        previous_pos = self.robot_pos.copy()
        self.previous_robot_pos = previous_pos
        valid, next_pos, real_action = self.valid_action(action)
        self.has_bumped = not valid

        if self.has_bumped:
            reward = -1
        elif (self.map[next_pos[0], next_pos[1]] == 1):
            self.count_collected_tresors += 1
            reward = 10
        elif (self.map[next_pos[0], next_pos[1]] == 4):
            next_next_pos = next_pos + real_action
            if self.map[next_next_pos[0], next_next_pos[1]] == -1:  # it's wall
                pass  # nothing change
            else:
                self.map[next_next_pos[0], next_next_pos[1]] = 4
            reward = -0.1
        else:
            reward = -0.1
        done = (self.count_collected_tresors == len(
            self.target_pos)) or (self._env_step_counter > self.max_steps)

        if self.has_bumped:
            # no need to update observation
            pass
        else:
            self.map[previous_pos[0], previous_pos[1]] = 0  # free space
            # update robot position and the map
            self.robot_pos = next_pos
            self.map[self.robot_pos[0], self.robot_pos[1]] = 2
            # update observation
            self._observation = self.getObservation()

        if self.saver is not None:
            # HACK TODO TODO: used for the estimation of GTC (ground truth correlation)
            if reward >= 1:
                discret_reward = 1
            elif reward <= -1:
                discret_reward = -1
            else:
                discret_reward = 0
            self.saver.step(self._observation, action, discret_reward, done,
                            np.zeros(2))
        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation), reward, done, {}

        return self._observation, reward, done, {}

    def render(self, mode='rgb_array', start=False):
        # [OPT] this part could be optimized again if needed, but it's not necessary, because 36,000 FPS
        # has already been ~5 times faster then 7000 FPS of PPO2 (with 10 CPUs).
        if self._observation is None or (isinstance(self._observation, list)
                                         and len(
                                             self._observation) == 0) or start:
            previous_obs = 255 * np.ones(
                (self._height, self._width, 3), dtype=np.uint8)
        else:
            previous_obs = self._observation

        map_h, map_w = self.map.shape
        square_size = self.square_size
        for i in range(map_h):
            for j in range(map_w):
                if self.map[i, j] == -1:
                    previous_obs[i * square_size:(i + 1) * square_size,
                                 j * square_size:(j + 1) * square_size, :] = 0
                elif self.map[i, j] == 1:
                    previous_obs[i * square_size:(i + 1) * square_size,
                                 j * square_size:(j + 1) *
                                 square_size, :] = self.target_img
                elif self.map[i, j] == 2:
                    previous_obs[i * square_size:(i + 1) * square_size,
                                 j * square_size:(j + 1) *
                                 square_size, :] = self.robot_img
                elif self.map[i, j] == 3:
                    previous_obs[i * square_size:(i + 1) * square_size,
                                 j * square_size:(j + 1) *
                                 square_size, :] = self.palm_img
                elif self.map[i, j] == 4:
                    previous_obs[i * square_size:(i + 1) * square_size,
                                 j * square_size:(j + 1) *
                                 square_size, :] = self.oak_img
                elif self.map[i, j] == 0:
                    # print(self.previous_robot_pos)
                    if self.previous_robot_pos is not None and i == self.previous_robot_pos[
                            0] and j == self.previous_robot_pos[1]:
                        previous_obs[i * square_size:(i + 1) * square_size,
                                     j * square_size:(j + 1) *
                                     square_size, :] = 255
                    else:
                        pass
                else:
                    raise NotImplementedError
        if self._renders:
            cv2.imshow(
                "Keep pressing (any key) to display or 'q'/'Esc' to quit.",
                previous_obs[..., ::-1])
            key = cv2.waitKey(0)
            if key == ord('q') or key == 27:  # 'q' or 'Esc'
                cv2.destroyAllWindows()
                raise KeyboardInterrupt
        return previous_obs

    def interactive(self, show_map=False):
        image = self.getObservation(start=True)
        cv2.imshow("image", image[..., ::-1])
        while True:
            k = cv2.waitKey(0)
            if k == 27 or k == ord("q"):  # press 'Esc' or 'q' to quit
                break
            elif k == ord("2") or k == 84:
                # down
                action = 0
                image, reward, done, _ = self.step(action)
                print("Action: {}, Reward: {}, Done: {}".format(
                    action, reward, done))
                if show_map:
                    print(self.map)
                cv2.imshow("image", image[..., ::-1])
            elif k == ord("6") or k == 83:
                # right
                action = 1
                image, reward, done, _ = self.step(action)
                print("Action: {}, Reward: {}, Done: {}".format(
                    action, reward, done))
                if show_map:
                    print(self.map)
                cv2.imshow("image", image[..., ::-1])
            elif k == ord("8") or k == 82:
                # up
                action = 2
                image, reward, done, _ = self.step(action)
                print("Action: {}, Reward: {}, Done: {}".format(
                    action, reward, done))
                if show_map:
                    print(self.map)
                cv2.imshow("image", image[..., ::-1])
            elif k == ord("4") or k == 81:
                # left
                action = 3
                image, reward, done, _ = self.step(action)
                print("Action: {}, Reward: {}, Done: {}".format(
                    action, reward, done))
                if show_map:
                    print(self.map)
                cv2.imshow("image", image[..., ::-1])
            else:
                print("You are pressing the key: {}".format(k))
Example #7
0
class CarRacingEnv(GymCarRacing, SRLGymEnv):
    def __init__(self,
                 name="car_racing",
                 renders=False,
                 record_data=False,
                 is_discrete=True,
                 state_dim=-1,
                 learn_states=False,
                 save_path='srl_zoo/data/',
                 srl_model="raw_pixels",
                 env_rank=0,
                 srl_pipe=None,
                 lookahead=5,
                 **_):
        """
        Gym wrapper for Racing car environment
        WARNING: to be compatible with kuka scripts, additional keyword arguments are discarded


        :param name: (str) name of the folder where recorded data will be stored
        :param renders: (bool) Whether to display the GUI or not
        :param record_data: (bool) Set to true, record frames with the rewards.
        :param is_discrete: (bool) Whether to use discrete or continuous actions
        :param state_dim: (int) When learning states
        :param learn_states: (bool)
        :param save_path: (str) location where the saved data should go
        :param srl_model: (str) The SRL_model used
        :param env_rank: (int) the number ID of the environment
        :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model
        :param lookahead: (int) How many segments ahead of the current position of the track should the target be
        """
        SRLGymEnv.__init__(self,
                           srl_model=srl_model,
                           relative_pos=RELATIVE_POS,
                           env_rank=env_rank,
                           srl_pipe=srl_pipe,
                           img_shape=None)
        GymCarRacing.__init__(self)
        self._renders = renders
        self.img_shape = img_shape
        if self.img_shape is None:
            self._width = RENDER_WIDTH
            self._height = RENDER_HEIGHT
        else:
            self._height, self._width = self.img_shape[1:]
        self._is_discrete = is_discrete
        self.lookahead = lookahead
        self.relative_pos = RELATIVE_POS
        self._env_step_counter = 0
        self._observation = None
        self.saver = None

        if record_data:
            self.saver = EpisodeSaver(name,
                                      None,
                                      state_dim,
                                      globals_=getGlobals(),
                                      relative_pos=RELATIVE_POS,
                                      learn_states=learn_states,
                                      path=save_path)

        # Accelerate, brake, stear left, stear right
        if self._is_discrete:
            self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        else:
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(3, ),
                                           dtype=np.float32)

        if self.srl_model == "ground_truth":
            self.state_dim = self.getGroundTruthDim()

        if self.srl_model == "raw_pixels":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self._height,
                                                       self._width, 3),
                                                dtype=np.uint8)
        else:
            self.observation_space = spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=(self.state_dim, ),
                                                dtype=np.float32)

    def getTargetPos(self):
        # get the nearest track segment to the current position
        # then return the track segment position that is ahead of the nearest track segement
        nearest_idx = np.argmin(
            list(
                map(
                    lambda a: np.sqrt(
                        np.sum(
                            ((list(a[2:4]) + [0, 0, 0]) - self.getGroundTruth(
                            ))**2)), self.track)))
        return np.array(
            list(self.track[(nearest_idx + self.lookahead) %
                            len(self.track)][2:4]) + [0, 0, 0])

    @staticmethod
    def getGroundTruthDim():
        return 5

    def getGroundTruth(self):
        # the car's current x,y position, angle, speed and angular speed
        return np.array(
            list(self.car.__dict__["hull"].position) + [
                self.car.__dict__["hull"].angle, self.car.__dict__["hull"].
                inertia, self.car.__dict__["hull"].angularVelocity
            ])

    def getObservation(self):
        """
        :return: (numpy array)
        """
        self._observation = self.render("rgb_array")
        self._observation = cv2.resize(self._observation,
                                       (self._width, self._height))
        return self._observation

    def reset(self):
        super().reset()
        self._env_step_counter = 0
        self.getObservation()

        if self.saver is not None:
            self.saver.reset(self._observation, self.getTargetPos(),
                             self.getGroundTruth())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation)

        return self._observation

    def step(self, action):
        if action is not None:
            if self._is_discrete:
                self.car.steer([-1, 1, 0, 0][action])
                self.car.gas([0, 0, 1, 0][action])
                self.car.brake([0, 0, 0, 1][action])
            else:
                self.car.steer(-action[0])
                self.car.gas(action[1])
                self.car.brake(action[2])

        self.car.step(1.0 / FPS)
        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
        self.t += 1.0 / FPS

        self.getObservation()

        step_reward = 0
        done = False
        if action is not None:  # First step without action, called from reset()
            self.reward -= 0.1
            self._env_step_counter += 1
            # We actually don't want to count fuel spent, we want car to be faster.
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count == len(
                    self.track) or self._env_step_counter >= MAX_STEPS:
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100

        if self.saver is not None:
            self.saver.step(self._observation, action, step_reward, done,
                            self.getGroundTruth())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation), step_reward, done, {}

        return np.array(self._observation), step_reward, done, {}

    # Copied for the original Gym Racing Car env, it is modified to be able to remove the render window.
    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.viewer.window.set_visible(self._renders)
            self.score_label = pyglet.text.Label('0000',
                                                 font_size=36,
                                                 x=20,
                                                 y=WINDOW_H * 2.5 / 40.00,
                                                 anchor_x='left',
                                                 anchor_y='center',
                                                 color=(255, 255, 255, 255))
            self.transform = rendering.Transform()

        if "t" not in self.__dict__:
            return  # reset() not called yet

        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(
            self.t, 1)  # Animate zoom first second
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) -
                            scroll_y * zoom * math.sin(angle)),
            WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) +
                            scroll_y * zoom * math.cos(angle)))
        self.transform.set_rotation(angle)

        self.car.draw(self.viewer, mode != "state_pixels")

        arr = None
        win = self.viewer.window
        if mode != 'state_pixels':
            win.switch_to()
            win.dispatch_events()
        if mode == "rgb_array" or mode == "state_pixels":
            win.clear()
            t = self.transform
            if mode == 'rgb_array':
                VP_W = VIDEO_W
                VP_H = VIDEO_H
            else:
                VP_W = STATE_W
                VP_H = STATE_H
            gl.glViewport(0, 0, VP_W, VP_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W,
                                   WINDOW_H)  # TODO: find why 2x needed, wtf
            image_data = pyglet.image.get_buffer_manager().get_color_buffer(
            ).get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]

        # agent can call or not call env.render() itself when recording video.
        if mode == "rgb_array" and not self.human_render:
            win.flip()

        if mode == 'human':
            self.human_render = True
            win.clear()
            t = self.transform
            gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W, WINDOW_H)
            win.flip()

        self.viewer.onetime_geoms = []

        return arr
Example #8
0
class RLBenchDSEnv():
    """
    wrapped_env is the rlbench env, passed in initialized. This class just calls actions on that env.
    """
    def __init__(self,
                 name="RLBenchDS-unset",
                 wrapped_env=None,
                 max_steps_per_epoch=100,
                 path=None,
                 **_kwargs):
        #Passed in in dataset_generator.py kwargs.
        self.env = wrapped_env
        self.max_steps_per_epoch = max_steps_per_epoch
        print("Recording data...")
        try:
            env_name = self.env.task.get_name()
        except:
            env_name = self.env.unwrapped.spec.id
        print(env_name)
        self.saver = EpisodeSaver(name, env_name=env_name, path=path)

        self.action_space = self.env.action_space

    def seed(self, seed=None):
        """
        Seed random generator
        :param seed: (int)
        :return: ([int])
        """
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action, t=0):
        """
        :action: (int)
        :return: (observation, int reward, bool done, dict extras)
        """
        self.action = action

        self.observation, reward, done, _ = self.env.step(action)
        ground_truth, obs = self.observation['state'], self.observation[
            'observation']
        done = done or t == self.max_steps_per_epoch
        self.saver.step(obs, action, reward, done, ground_truth)
        return self.observation, reward, done, {}

    @staticmethod
    def getGroundTruthDim():
        """
        :return: (int)
        """
        return None

    def getGroundTruth(self):
        """
        Don't need this method 
        :return: (numpy array)
        """
        return None

    def reset(self):
        """
        Reset the environment
        :return: (numpy ndarray) first observation of the env
        """
        self.observation = self.env.reset()
        ground_truth, obs = self.observation['state'], self.observation[
            'observation']
        if self.saver is not None:
            self.saver.reset(obs, ground_truth)
        return self.observation

    def render(self, mode='rgb_array'):
        """
        :param mode: (str)
        :return: (numpy array) BGR image
        """
        return None