def __init__(self, name="car_racing", renders=False, record_data=False, is_discrete=True, state_dim=-1, learn_states=False, save_path='srl_zoo/data/', srl_model="raw_pixels", env_rank=0, srl_pipe=None, lookahead=5, **_): """ Gym wrapper for Racing car environment WARNING: to be compatible with kuka scripts, additional keyword arguments are discarded :param name: (str) name of the folder where recorded data will be stored :param renders: (bool) Whether to display the GUI or not :param record_data: (bool) Set to true, record frames with the rewards. :param is_discrete: (bool) Whether to use discrete or continuous actions :param state_dim: (int) When learning states :param learn_states: (bool) :param save_path: (str) location where the saved data should go :param srl_model: (str) The SRL_model used :param env_rank: (int) the number ID of the environment :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model :param lookahead: (int) How many segments ahead of the current position of the track should the target be """ SRLGymEnv.__init__(self, srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) GymCarRacing.__init__(self) self._renders = renders self._width = RENDER_WIDTH self._height = RENDER_HEIGHT self._is_discrete = is_discrete self.lookahead = lookahead self.relative_pos = RELATIVE_POS self._env_step_counter = 0 self._observation = None self.saver = None if record_data: self.saver = EpisodeSaver(name, None, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states, path=save_path) # Accelerate, brake, stear left, stear right if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: self.action_space = spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() if self.srl_model == "raw_pixels": self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim,), dtype=np.float32)
def __init__(self, name="RLBenchDS-unset", wrapped_env=None, max_steps_per_epoch=100, path=None, **_kwargs): #Passed in in dataset_generator.py kwargs. self.env = wrapped_env self.max_steps_per_epoch = max_steps_per_epoch print("Recording data...") try: env_name = self.env.task.get_name() except: env_name = self.env.unwrapped.spec.id print(env_name) self.saver = EpisodeSaver(name, env_name=env_name, path=path) self.action_space = self.env.action_space
def __init__(self, renders=False, is_discrete=True, name="labyrinth", max_distance=1.6, shape_reward=False, record_data=False, srl_model="raw_pixels", random_target=False, state_dim=-1, verbose=False, save_path='srl_zoo/data/', env_rank=0, srl_pipe=None, img_shape=None, **_): super(LabyrinthEnv1, self).__init__(srl_model=srl_model, relative_pos=False, env_rank=env_rank, srl_pipe=srl_pipe) self._observation = [] self._renders = renders self.img_shape = img_shape # channel first !! if self.img_shape is None: self._width = RENDER_WIDTH self._height = RENDER_HEIGHT else: self._height, self._width = self.img_shape[1:] # self._shape_reward = shape_reward self._random_target = random_target self.state_dim = state_dim self.saver = None self.verbose = verbose self.max_steps = MAX_STEPS self.robot_pos = np.zeros(2, dtype=int) # index on the self.map array self.target_pos = [] # list of indexes (target positions) on the self.map array self.previous_robot_pos = None self.has_bumped = False # Used for handling collisions self.count_collected_tresors = 0 self._env_step_counter = 0 self.srl_model = srl_model self.maze_size = MAZE_SIZE self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) if record_data: self.saver = EpisodeSaver(name, max_distance, state_dim, globals_=getGlobals(), path=save_path) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() if self.srl_model == "raw_pixels": self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim,), dtype=np.float32)
def __init__(self, renders=False, is_discrete=True, name="mobile_robot", max_distance=1.6, shape_reward=False, record_data=False, srl_model="raw_pixels", random_target=False, state_dim=-1, verbose=False, save_path='srl_zoo/data/', env_rank=0, srl_pipe=None, img_shape=None, **_): super(MobileRobotX, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) self._renders = renders self.img_shape = img_shape # channel first !! if self.img_shape is None: self._width = RENDER_WIDTH self._height = RENDER_HEIGHT else: self._height, self._width = self.img_shape[1:] self._max_distance = max_distance self._shape_reward = shape_reward self._random_target = random_target self._is_discrete = is_discrete self.state_dim = state_dim self.relative_pos = RELATIVE_POS self.saver = None self.verbose = verbose self.max_steps = MAX_STEPS self.robot_pos = np.zeros(2) self.target_pos = np.zeros(2) # Boundaries of the square env self._min_x, self._max_x = 0, 4 self._min_y, self._max_y = 0, 4 self.collision_margin = 0.1 self._observation = [] self._env_step_counter = 0 self.has_bumped = False # Used for handling collisions self.imgs_folder = "mobilerobot" ## 'mobilerobot', 'omnirobot' self.robot_img = cv2.imread( "./environments/mobile_robot_extreme/images/{}/robot_224.png". format(self.imgs_folder)) self.target_img = cv2.imread( "./environments/mobile_robot_extreme/images/{}/target_224.png". format(self.imgs_folder)) self.background_img = cv2.imread( "./environments/mobile_robot_extreme/images/{}/background_224.png". format(self.imgs_folder)) robot_img_shape = self.robot_img.shape self.robot_img = cv2.resize( self.robot_img, (int(robot_img_shape[1] * self._width / 224), int(robot_img_shape[0] * self._height / 224))) target_img_shape = self.target_img.shape self.target_img = cv2.resize( self.target_img, (int(target_img_shape[1] * self._width / 224), int(target_img_shape[0] * self._height / 224))) self.background_img = cv2.resize(self.background_img, (self._width, self._height)) # cv2.imshow("robot", self.robot_img) # k = cv2.waitKey(0) # if k == ord("q"): # raise KeyboardInterrupt # self.walls = None self.srl_model = srl_model if record_data: self.saver = EpisodeSaver(name, max_distance, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=False, path=save_path) if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: self.action_space = spaces.Box(low=-1, high=1, shape=(2, ), dtype=np.float32) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() if self.srl_model == "raw_pixels": self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=np.float32)
class MobileRobotX(SRLGymEnv): """ """ def __init__(self, renders=False, is_discrete=True, name="mobile_robot", max_distance=1.6, shape_reward=False, record_data=False, srl_model="raw_pixels", random_target=False, state_dim=-1, verbose=False, save_path='srl_zoo/data/', env_rank=0, srl_pipe=None, img_shape=None, **_): super(MobileRobotX, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) self._renders = renders self.img_shape = img_shape # channel first !! if self.img_shape is None: self._width = RENDER_WIDTH self._height = RENDER_HEIGHT else: self._height, self._width = self.img_shape[1:] self._max_distance = max_distance self._shape_reward = shape_reward self._random_target = random_target self._is_discrete = is_discrete self.state_dim = state_dim self.relative_pos = RELATIVE_POS self.saver = None self.verbose = verbose self.max_steps = MAX_STEPS self.robot_pos = np.zeros(2) self.target_pos = np.zeros(2) # Boundaries of the square env self._min_x, self._max_x = 0, 4 self._min_y, self._max_y = 0, 4 self.collision_margin = 0.1 self._observation = [] self._env_step_counter = 0 self.has_bumped = False # Used for handling collisions self.imgs_folder = "mobilerobot" ## 'mobilerobot', 'omnirobot' self.robot_img = cv2.imread( "./environments/mobile_robot_extreme/images/{}/robot_224.png". format(self.imgs_folder)) self.target_img = cv2.imread( "./environments/mobile_robot_extreme/images/{}/target_224.png". format(self.imgs_folder)) self.background_img = cv2.imread( "./environments/mobile_robot_extreme/images/{}/background_224.png". format(self.imgs_folder)) robot_img_shape = self.robot_img.shape self.robot_img = cv2.resize( self.robot_img, (int(robot_img_shape[1] * self._width / 224), int(robot_img_shape[0] * self._height / 224))) target_img_shape = self.target_img.shape self.target_img = cv2.resize( self.target_img, (int(target_img_shape[1] * self._width / 224), int(target_img_shape[0] * self._height / 224))) self.background_img = cv2.resize(self.background_img, (self._width, self._height)) # cv2.imshow("robot", self.robot_img) # k = cv2.waitKey(0) # if k == ord("q"): # raise KeyboardInterrupt # self.walls = None self.srl_model = srl_model if record_data: self.saver = EpisodeSaver(name, max_distance, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=False, path=save_path) if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: self.action_space = spaces.Box(low=-1, high=1, shape=(2, ), dtype=np.float32) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() if self.srl_model == "raw_pixels": self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=np.float32) def getTargetPos(self): # Return only the [x, y] coordinates return self.target_pos # @staticmethod def getGroundTruthDim(self): """ The new convention for the ground truth (GT): GT should include target position under random target setting. """ # HACK : Monkey-Patch, shit solution to solve problem. if not self._random_target: return 2 else: return 4 def getRobotPos(self): # Return only the [x, y] coordinates return self.robot_pos def getGroundTruth(self): """ The new convention for the ground truth (GT): GT should include target position under random target setting. A better solution would be "change all the environment files, especially srl_env.py" !!! """ # HACK: Monkey-Patch, shit solution to solve problem. robot_pos = self.getRobotPos() if self._random_target: if self.relative_pos: # HACK here! Change srl_env.py and all the other envs in the future !!! TODO TODO TODO return robot_pos else: # check 'envs.observation_space' in rl_baselines/base_classes.py (before self.model.learn) !!! target_pos = self.getTargetPos() return np.concatenate([robot_pos, target_pos], axis=0) else: if self.relative_pos: # HACK here! Change srl_env.py and all the other envs in the future !!! TODO TODO TODO return robot_pos else: return robot_pos def reset(self): self._env_step_counter = 0 self.has_bumped = False # Init the robot randomly x_start = self._max_x / 2 + self.np_random.uniform( -self._max_x / 3, self._max_x / 3) y_start = self._max_y / 2 + self.np_random.uniform( -self._max_y / 3, self._max_y / 3) self.robot_pos = np.array([x_start, y_start]) # Initialize target position x_pos = 0.8 * self._max_x y_pos = 0.7 * self._max_y if self._random_target: margin = self.collision_margin * self._max_x + 0.08 ## 0.4 + 0.05 x_pos = self.np_random.uniform(self._min_x + margin, self._max_x - margin) y_pos = self.np_random.uniform(self._min_y + margin, self._max_y - margin) self.target_pos = np.array([x_pos, y_pos]) # self.walls = [wall_left, wall_bottom, wall_right, wall_top] self._observation = self.getObservation() if self.saver is not None: self.saver.reset(self._observation, self.getTargetPos(), self.getRobotPos()) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation) return self._observation def getObservation(self): """ :return: (numpy array) """ self._observation = self.render("rgb_array") return self._observation def step(self, action): # True if it has bumped against a wall self._env_step_counter += 1 self.has_bumped = False if self._is_discrete: dv = DELTA_POS # Add noise to action dv += self.np_random.normal(0.0, scale=NOISE_STD) dx = [-dv, dv, 0, 0][action] dy = [0, 0, -dv, dv][action] real_action = np.array([dx, dy]) else: dv = DELTA_POS # Add noise to action dv += self.np_random.normal(0.0, scale=NOISE_STD) # scale action amplitude between -dv and dv real_action = np.maximum(np.minimum(action, 1), -1) * dv previous_pos = self.robot_pos.copy() self.robot_pos += real_action # Handle collisions for i, (limit, robot_dim) in enumerate( zip([self._max_y, self._max_x], [ROBOT_WIDTH, ROBOT_LENGTH])): margin = self.collision_margin * limit + robot_dim / 2 # margin += 0.02 # If it has bumped against a wall, stay at the previous position if self.robot_pos[i] <= margin or self.robot_pos[ i] >= limit - margin: self.has_bumped = True self.robot_pos = previous_pos break # Update mobile robot position on image self._observation = self.getObservation() # calculate the reward of this step distance = np.linalg.norm(self.getTargetPos() - self.robot_pos, 2) reward = 0 if distance <= REWARD_DIST_THRESHOLD: reward = 1 # Negative reward when it bumps into a wall if self.has_bumped: reward = -1 if self._shape_reward: reward = -distance done = self._env_step_counter > self.max_steps if self.saver is not None: self.saver.step(self._observation, action, reward, done, self.getRobotPos()) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation), reward, done, {} return np.array(self._observation), reward, done, {} def render(self, mode='rgb_array'): """ mode is useless """ image = self.background_img.copy() # self.robot_pos = np.array([1.2, 3.6]) # ## get robot position on image (pixel position) # robot_img_pos = np.array([(self.robot_pos[1]/self._max_y)*self._height, (self.robot_pos[0]/self._max_x) * self._width], dtype=int) # h_st = robot_img_pos[0]-self.robot_img.shape[0]//2 #, robot_img_pos[0]+self.robot_img.shape[0]//2 # h_ed = self.robot_img.shape[0]+h_st # w_st = robot_img_pos[1]-self.robot_img.shape[1]//2 # w_ed = self.robot_img.shape[1]+w_st # # print(self.robot_pos) # ## HACK # mask = (np.mean(self.robot_img, axis=-1) < 250) # # import matplotlib.pyplot as plt # # plt.figure() # # plt.imshow(mask.astype(float)) # # plt.show() # image[h_st:h_ed, w_st:w_ed, :][mask] = self.robot_img[mask] # self.target_pos = np.array([3.6, 3.6]) # print(self.target_pos) for obj_pos, obj_img in zip([self.target_pos, self.robot_pos], [self.target_img, self.robot_img]): obj_img_pos = np.array([(obj_pos[1] / self._max_y) * self._height, (obj_pos[0] / self._max_x) * self._width], dtype=int) h_st = obj_img_pos[0] - obj_img.shape[0] // 2 h_ed = obj_img.shape[0] + h_st w_st = obj_img_pos[1] - obj_img.shape[1] // 2 w_ed = obj_img.shape[1] + w_st mask = (np.mean(obj_img, axis=-1) < 240) image[h_st:h_ed, w_st:w_ed, :][mask] = obj_img[mask] if self._renders: cv2.imshow( "Keep pressing (any key) to display or 'q'/'Esc' to quit.", image) key = cv2.waitKey(0) if key == ord('q') or key == 27: # 'q' or 'Esc' cv2.destroyAllWindows() raise KeyboardInterrupt return image[..., ::-1] def interactive(self, show_map=False): image = self.getObservation() cv2.imshow("image", image[..., ::-1]) while True: k = cv2.waitKey(0) if k == 27 or k == ord("q"): # press 'Esc' or 'q' to quit break elif k == ord("2") or k == 84: # down action = 3 image, reward, done, _ = self.step(action) print("Action: {}, Reward: {}, Done: {}".format( action, reward, done)) if show_map: print(self.map) cv2.imshow("image", image[..., ::-1]) elif k == ord("6") or k == 83: # right action = 1 image, reward, done, _ = self.step(action) print("Action: {}, Reward: {}, Done: {}".format( action, reward, done)) if show_map: print(self.map) cv2.imshow("image", image[..., ::-1]) elif k == ord("8") or k == 82: # up action = 2 image, reward, done, _ = self.step(action) print("Action: {}, Reward: {}, Done: {}".format( action, reward, done)) if show_map: print(self.map) cv2.imshow("image", image[..., ::-1]) elif k == ord("4") or k == 81: # left action = 0 image, reward, done, _ = self.step(action) print("Action: {}, Reward: {}, Done: {}".format( action, reward, done)) if show_map: print(self.map) cv2.imshow("image", image[..., ::-1]) else: print("You are pressing the key: {}".format(k))
def __init__(self, urdf_root=pybullet_data.getDataPath(), renders=False, is_discrete=True, multi_view=False, name="kuka_button_gym", max_distance=0.8, action_repeat=1, shape_reward=False, action_joints=False, record_data=False, random_target=False, force_down=True, state_dim=-1, learn_states=False, verbose=False, save_path='srl_zoo/data/', env_rank=0, srl_pipe=None, srl_model="raw_pixels", **_): super(InmoovButtonGymEnv, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) self._timestep = 1. / 240. # URDF PATH self._urdf_root = urdf_root self._urdf_sjtu = "../../urdf_robot/" self._action_repeat = action_repeat self._observation = [] self._env_step_counter = 0 self._renders = renders self._width = RENDER_WIDTH self._height = RENDER_HEIGHT self._cam_dist = 1.1 self._cam_yaw = 145 self._cam_pitch = -36 self._cam_roll = 0 self._max_distance = max_distance self._shape_reward = shape_reward self._random_target = random_target self._force_down = force_down self.camera_target_pos = (0.316, -0.2, -0.1) self._is_discrete = is_discrete self.terminated = False self.renderer = p.ER_TINY_RENDERER self.debug = True self.n_contacts = 0 self.state_dim = state_dim self.action_joints = action_joints self.relative_pos = RELATIVE_POS self.cuda = th.cuda.is_available() self.saver = None self.multi_view = multi_view self.verbose = verbose self.max_steps = MAX_STEPS self.n_steps_outside = 0 self.table_uid = None self.button_pos = None self.button_uid = None self._kuka = None self._inmoov = None self.action = None self.srl_model = srl_model if record_data: self.saver = EpisodeSaver(name, max_distance, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states, path=save_path) if self._renders: client_id = p.connect(p.SHARED_MEMORY) if client_id < 0: p.connect(p.GUI) p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33]) self.debug = True # Debug sliders for moving the camera self.x_slider = p.addUserDebugParameter("x_slider", -10, 10, self.camera_target_pos[0]) self.y_slider = p.addUserDebugParameter("y_slider", -10, 10, self.camera_target_pos[1]) self.z_slider = p.addUserDebugParameter("z_slider", -10, 10, self.camera_target_pos[2]) self.dist_slider = p.addUserDebugParameter("cam_dist", 0, 10, self._cam_dist) self.yaw_slider = p.addUserDebugParameter("cam_yaw", -180, 180, self._cam_yaw) self.pitch_slider = p.addUserDebugParameter("cam_pitch", -180, 180, self._cam_pitch) else: p.connect(p.DIRECT) global CONNECTED_TO_SIMULATOR CONNECTED_TO_SIMULATOR = True if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: if self.action_joints: # 7 angles for the arm rotation, from -1 to 1 #TODO:action_dim should be changed action_dim = 7 self._action_bound = 1 else: # 3 direction for the arm movement, from -1 to 1 action_dim = 3 self._action_bound = 1 action_high = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim()
class InmoovButtonGymEnv(SRLGymEnv): """ Gym wrapper for Kuka environment with a push button :param urdf_root: (str) Path to pybullet urdf files :param renders: (bool) Whether to display the GUI or not :param is_discrete: (bool) Whether to use discrete or continuous actions :param multi_view :(bool) if TRUE -> returns stacked images of the scene on 6 channels (two cameras) :param name: (str) name of the folder where recorded data will be stored :param max_distance: (float) Max distance between end effector and the button (for negative reward) :param action_repeat: (int) Number of timesteps an action is repeated (here it is equivalent to frameskip) :param shape_reward: (bool) Set to true, reward = -distance_to_goal :param action_joints: (bool) Set actions to apply to the joint space :param record_data: (bool) Set to true, record frames with the rewards. :param random_target: (bool) Set the button position to a random position on the table :param force_down: (bool) Set Down as the only vertical action allowed :param state_dim: (int) When learning states :param learn_states: (bool) :param verbose: (bool) Whether to print some debug info :param save_path: (str) location where the saved data should go :param env_rank: (int) the number ID of the environment :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model :param srl_model: (str) The SRL_model used """ def __init__(self, urdf_root=pybullet_data.getDataPath(), renders=False, is_discrete=True, multi_view=False, name="kuka_button_gym", max_distance=0.8, action_repeat=1, shape_reward=False, action_joints=False, record_data=False, random_target=False, force_down=True, state_dim=-1, learn_states=False, verbose=False, save_path='srl_zoo/data/', env_rank=0, srl_pipe=None, srl_model="raw_pixels", **_): super(InmoovButtonGymEnv, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) self._timestep = 1. / 240. # URDF PATH self._urdf_root = urdf_root self._urdf_sjtu = "../../urdf_robot/" self._action_repeat = action_repeat self._observation = [] self._env_step_counter = 0 self._renders = renders self._width = RENDER_WIDTH self._height = RENDER_HEIGHT self._cam_dist = 1.1 self._cam_yaw = 145 self._cam_pitch = -36 self._cam_roll = 0 self._max_distance = max_distance self._shape_reward = shape_reward self._random_target = random_target self._force_down = force_down self.camera_target_pos = (0.316, -0.2, -0.1) self._is_discrete = is_discrete self.terminated = False self.renderer = p.ER_TINY_RENDERER self.debug = True self.n_contacts = 0 self.state_dim = state_dim self.action_joints = action_joints self.relative_pos = RELATIVE_POS self.cuda = th.cuda.is_available() self.saver = None self.multi_view = multi_view self.verbose = verbose self.max_steps = MAX_STEPS self.n_steps_outside = 0 self.table_uid = None self.button_pos = None self.button_uid = None self._kuka = None self._inmoov = None self.action = None self.srl_model = srl_model if record_data: self.saver = EpisodeSaver(name, max_distance, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states, path=save_path) if self._renders: client_id = p.connect(p.SHARED_MEMORY) if client_id < 0: p.connect(p.GUI) p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33]) self.debug = True # Debug sliders for moving the camera self.x_slider = p.addUserDebugParameter("x_slider", -10, 10, self.camera_target_pos[0]) self.y_slider = p.addUserDebugParameter("y_slider", -10, 10, self.camera_target_pos[1]) self.z_slider = p.addUserDebugParameter("z_slider", -10, 10, self.camera_target_pos[2]) self.dist_slider = p.addUserDebugParameter("cam_dist", 0, 10, self._cam_dist) self.yaw_slider = p.addUserDebugParameter("cam_yaw", -180, 180, self._cam_yaw) self.pitch_slider = p.addUserDebugParameter("cam_pitch", -180, 180, self._cam_pitch) else: p.connect(p.DIRECT) global CONNECTED_TO_SIMULATOR CONNECTED_TO_SIMULATOR = True if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: if self.action_joints: # 7 angles for the arm rotation, from -1 to 1 #TODO:action_dim should be changed action_dim = 7 self._action_bound = 1 else: # 3 direction for the arm movement, from -1 to 1 action_dim = 3 self._action_bound = 1 action_high = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() #elif self.srl_model == "joints": # self.state_dim = self.getJointsDim() #elif self.srl_model == "joints_position": # self.state_dim = self.getGroundTruthDim() + self.getJointsDim() #if self.srl_model == "raw_pixels": # self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) #else: # self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim,), dtype=np.float32) def getSRLState(self, observation): state = [] if self.srl_model in ["ground_truth"]: if self.relative_pos: state += list(self.getGroundTruth() - self.getTargetPos()) else: state += list(self.getGroundTruth()) if len(state) != 0: return np.array(state) else: self.srl_pipe[0].put((self.env_rank, observation)) return self.srl_pipe[1][self.env_rank].get() ''' if self.srl_model in ["ground_truth", "joints_position"]: if self.relative_pos: state += list(self.getGroundTruth() - self.getTargetPos()) else: state += list(self.getGroundTruth()) if self.srl_model in ["joints", "joints_position"]: state += list(self._kuka.joint_positions) if len(state) != 0: return np.array(state) else: self.srl_pipe[0].put((self.env_rank, observation)) return self.srl_pipe[1][self.env_rank].get()''' def getTargetPos(self): return self.button_pos @staticmethod def getJointsDim(): """ :return: (int) """ return 6 @staticmethod def getGroundTruthDim(): return 3 def getGroundTruth(self): return np.array(self.getArmPos()) def getArmPos(self): """ :return: ([float]) Position (x, y, z) of inmoov hand """ return p.getLinkState(self._inmoov.inmoov_id, self._inmoov.effectorId)[0] def reset(self): self.terminated = False self.n_contacts = 0 self.n_steps_outside = 0 p.resetSimulation() p.setPhysicsEngineParameter(numSolverIterations=150) p.setTimeStep(self._timestep) p.loadURDF(os.path.join(self._urdf_root, "plane.urdf"), [0, 0, -1]) self.table_uid = p.loadURDF(os.path.join(self._urdf_root, "table/table.urdf"), 0.5000000, 0.00000, -.820000, 0.000000, 0.000000, 0.0, 1.0) # Initialize button position x_pos = 0.5 y_pos = 0 if self._random_target: x_pos += 0.15 * self.np_random.uniform(-1, 1) y_pos += 0.3 * self.np_random.uniform(-1, 1) self.button_uid = p.loadURDF("/urdf/simple_button.urdf", [x_pos, y_pos, Z_TABLE]) self.button_pos = np.array([x_pos, y_pos, Z_TABLE]) p.setGravity(0, 0, -10) self._inmoov = inmoov.Inmoov(urdf_path = self._urdf_sjtu) self._env_step_counter = 0 # Close the gripper and wait for the arm to be in rest position for _ in range(500): if self.action_joints: self._inmoov.applyAction(list(np.array(self._inmoov.joint_positions)[:7]) + [0, 0]) else: self._inmoov.applyAction([0, 0, 0, 0, 0]) p.stepSimulation() # Randomize init arm pos: take 5 random actions for _ in range(N_RANDOM_ACTIONS_AT_INIT): if self._is_discrete: action = [0, 0, 0, 0, 0] sign = 1 if self.np_random.rand() > 0.5 else -1 action_idx = self.np_random.randint(3) # dx, dy or dz action[action_idx] += sign * DELTA_V else: if self.action_joints: joints = np.array(self._kuka.joint_positions)[:7] joints += DELTA_THETA * self.np_random.normal(joints.shape) action = list(joints) + [0, 0] else: action = np.zeros(5) rand_direction = self.np_random.normal((3,)) # L2 normalize, so that the random direction is not too high or too low rand_direction /= np.linalg.norm(rand_direction, 2) action[:3] += DELTA_V_CONTINUOUS * rand_direction self._kuka.applyAction(list(action)) p.stepSimulation() self._observation = self.getExtendedObservation() self.button_pos = np.array(p.getLinkState(self.button_uid, BUTTON_LINK_IDX)[0]) self.button_pos[2] += BUTTON_DISTANCE_HEIGHT # Set the target position on the top of the button if self.saver is not None: self.saver.reset(self._observation, self.getTargetPos(), self.getGroundTruth()) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation) return np.array(self._observation) def __del__(self): if CONNECTED_TO_SIMULATOR: p.disconnect() def getExtendedObservation(self): if getNChannels() > 3: self.multi_view = True self._observation = self.render("rgb_array") return self._observation def step(self, action): # if you choose to do nothing if action is None: if self.action_joints: return self.step2(list(np.array(self._kuka.joint_positions)[:7]) + [0, 0]) else: return self.step2([0, 0, 0, 0, 0]) self.action = action # For saver if self._is_discrete: dv = DELTA_V # velocity per physics step. # Add noise to action dv += self.np_random.normal(0.0, scale=NOISE_STD) dx = [-dv, dv, 0, 0, 0, 0][action] dy = [0, 0, -dv, dv, 0, 0][action] if self._force_down: dz = [0, 0, 0, 0, -dv, -dv][action] # Remove up action else: dz = [0, 0, 0, 0, -dv, dv][action] # da = [0, 0, 0, 0, 0, -0.1, 0.1][action] # end effector angle finger_angle = 0.0 # Close the gripper # real_action = [dx, dy, -0.002, da, finger_angle] real_action = [dx, dy, dz, 0, finger_angle] print("Action: [dx, dy, dz, 0, finger_angle]: {}".format(real_action)) else: if self.action_joints: arm_joints = np.array(self._kuka.joint_positions)[:7] d_theta = DELTA_THETA # Add noise to action d_theta += self.np_random.normal(0.0, scale=NOISE_STD_JOINTS) # append [0,0] for finger angles real_action = list(action * d_theta + arm_joints) + [0, 0] # TODO remove up action else: dv = DELTA_V_CONTINUOUS # Add noise to action dv += self.np_random.normal(0.0, scale=NOISE_STD_CONTINUOUS) dx = action[0] * dv dy = action[1] * dv if self._force_down: dz = -abs(action[2] * dv) # Remove up action else: dz = action[2] * dv finger_angle = 0.0 # Close the gripper real_action = [dx, dy, dz, 0, finger_angle] if self.verbose: print(np.array2string(np.array(real_action), precision=2)) return self.step2(real_action) def step2(self, action): """ :param action:([float]) """ # Apply force to the button p.setJointMotorControl2(self.button_uid, BUTTON_GLIDER_IDX, controlMode=p.POSITION_CONTROL, targetPosition=0.1) for i in range(self._action_repeat): self._kuka.applyAction(action) p.stepSimulation() if self._termination(): break self._env_step_counter += 1 self._observation = self.getExtendedObservation() if self._renders: time.sleep(self._timestep) reward = self._reward() done = self._termination() if self.saver is not None: self.saver.step(self._observation, self.action, reward, done, self.getGroundTruth()) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation), reward, done, {} return np.array(self._observation), reward, done, {} def render(self, mode='human', close=False): if mode != "rgb_array": return np.array([]) camera_target_pos = self.camera_target_pos if self.debug: self._cam_dist = p.readUserDebugParameter(self.dist_slider) self._cam_yaw = p.readUserDebugParameter(self.yaw_slider) self._cam_pitch = p.readUserDebugParameter(self.pitch_slider) x = p.readUserDebugParameter(self.x_slider) y = p.readUserDebugParameter(self.y_slider) z = p.readUserDebugParameter(self.z_slider) camera_target_pos = (x, y, z) # self._cam_roll = p.readUserDebugParameter(self.roll_slider) # TODO: recompute view_matrix and proj_matrix only in debug mode view_matrix1 = p.computeViewMatrixFromYawPitchRoll( cameraTargetPosition=camera_target_pos, distance=self._cam_dist, yaw=self._cam_yaw, pitch=self._cam_pitch, roll=self._cam_roll, upAxisIndex=2) proj_matrix1 = p.computeProjectionMatrixFOV( fov=60, aspect=float(RENDER_WIDTH) / RENDER_HEIGHT, nearVal=0.1, farVal=100.0) (_, _, px1, _, _) = p.getCameraImage( width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix1, projectionMatrix=proj_matrix1, renderer=self.renderer) rgb_array1 = np.array(px1) if self.multi_view: # adding a second camera on the other side of the robot view_matrix2 = p.computeViewMatrixFromYawPitchRoll( cameraTargetPosition=(0.316, 0.316, -0.105), distance=1.05, yaw=32, pitch=-13, roll=0, upAxisIndex=2) proj_matrix2 = p.computeProjectionMatrixFOV( fov=60, aspect=float(RENDER_WIDTH) / RENDER_HEIGHT, nearVal=0.1, farVal=100.0) (_, _, px2, _, _) = p.getCameraImage( width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix2, projectionMatrix=proj_matrix2, renderer=self.renderer) rgb_array2 = np.array(px2) rgb_array_res = np.concatenate((rgb_array1[:, :, :3], rgb_array2[:, :, :3]), axis=2) else: rgb_array_res = rgb_array1[:, :, :3] return rgb_array_res def _termination(self): if self.terminated or self._env_step_counter > self.max_steps: self._observation = self.getExtendedObservation() return True return False def _reward(self): gripper_pos = self.getArmPos() distance = np.linalg.norm(self.button_pos - gripper_pos, 2) # print(distance) contact_points = p.getContactPoints(self.button_uid, self._kuka.kuka_uid, BUTTON_LINK_IDX) reward = int(len(contact_points) > 0) self.n_contacts += reward contact_with_table = len(p.getContactPoints(self.table_uid, self._kuka.kuka_uid)) > 0 if distance > self._max_distance or contact_with_table: reward = -1 self.n_steps_outside += 1 else: self.n_steps_outside = 0 if contact_with_table or self.n_contacts >= N_CONTACTS_BEFORE_TERMINATION \ or self.n_steps_outside >= N_STEPS_OUTSIDE_SAFETY_SPHERE: self.terminated = True if self._shape_reward: if self._is_discrete: return -distance else: # Button pushed if self.terminated and reward > 0: return 50 # out of bounds elif self.terminated and reward < 0: return -250 # anything else else: return -distance return reward
def __init__(self, renders=False, is_discrete=True, log_folder="robobo_log_folder", state_dim=-1, learn_states=False, srl_model="raw_pixels", record_data=False, shape_reward=False, env_rank=0, srl_pipe=None, img_shape=None): super(RoboboEnv, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) self.n_contacts = 0 use_ground_truth = srl_model == 'ground_truth' use_srl = srl_model != 'raw_pixels' self.use_srl = use_srl or use_ground_truth self.use_ground_truth = use_ground_truth self.use_joints = False self.relative_pos = RELATIVE_POS self._is_discrete = is_discrete self.observation = [] # Start simulation with first observation self._env_step_counter = 0 self.episode_terminated = False self.state_dim = state_dim self._renders = renders self._shape_reward = shape_reward self.cuda = th.cuda.is_available() self.target_pos = None self.saver = None if img_shape is None: self.img_shape = (3, RENDER_HEIGHT, RENDER_WIDTH) else: self.img_shape = img_shape if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: action_dim = 2 self._action_bound = 1 action_bounds = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_bounds, action_bounds, dtype=np.float32) # SRL model if self.use_srl: if use_ground_truth: self.state_dim = self.getGroundTruthDim() self.dtype = np.float32 self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=self.dtype) else: self.dtype = np.uint8 self.observation_space = spaces.Box(low=0, high=255, shape=(self.img_shape[2], self.img_shape[1], 3), dtype=self.dtype) if record_data: print("Recording data...") self.saver = EpisodeSaver(log_folder, 0, self.state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states) # Initialize Baxter effector by connecting to the Gym bridge ROS node: self.context = zmq.Context() self.socket = self.context.socket(zmq.PAIR) self.socket.connect("tcp://{}:{}".format(HOSTNAME, SERVER_PORT)) # note: if takes too long, run first client, then server print("Waiting for server connection...") msg = self.socket.recv_json() print("Connected to server (received message: {})".format(msg)) self.action = [0, 0] self.reward = 0 self.robobo_pos = np.array([0, 0]) # Initialize the state if self._renders: self.image_plot = None
class RoboboEnv(SRLGymEnv): """ Robobo robot Environment (Gym wrapper for Robobo Gazebo environment) The goal of robobo is to go to the location on the table (signaled with a circle sticker on the table) :param renders: (bool) Whether to display the GUI or not :param is_discrete: (bool) true if action space is discrete vs continuous :param log_folder: (str) name of the folder where recorded data will be stored :param state_dim: (int) When learning states :param learn_states: (bool) :param record_data: (bool) Set to true, record frames with the rewards. :param shape_reward: (bool) Set to true, reward = -distance_to_goal :param env_rank: (int) the number ID of the environment :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model """ def __init__(self, renders=False, is_discrete=True, log_folder="robobo_log_folder", state_dim=-1, learn_states=False, srl_model="raw_pixels", record_data=False, shape_reward=False, env_rank=0, srl_pipe=None, img_shape=None): super(RoboboEnv, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) self.n_contacts = 0 use_ground_truth = srl_model == 'ground_truth' use_srl = srl_model != 'raw_pixels' self.use_srl = use_srl or use_ground_truth self.use_ground_truth = use_ground_truth self.use_joints = False self.relative_pos = RELATIVE_POS self._is_discrete = is_discrete self.observation = [] # Start simulation with first observation self._env_step_counter = 0 self.episode_terminated = False self.state_dim = state_dim self._renders = renders self._shape_reward = shape_reward self.cuda = th.cuda.is_available() self.target_pos = None self.saver = None if img_shape is None: self.img_shape = (3, RENDER_HEIGHT, RENDER_WIDTH) else: self.img_shape = img_shape if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: action_dim = 2 self._action_bound = 1 action_bounds = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_bounds, action_bounds, dtype=np.float32) # SRL model if self.use_srl: if use_ground_truth: self.state_dim = self.getGroundTruthDim() self.dtype = np.float32 self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=self.dtype) else: self.dtype = np.uint8 self.observation_space = spaces.Box(low=0, high=255, shape=(self.img_shape[2], self.img_shape[1], 3), dtype=self.dtype) if record_data: print("Recording data...") self.saver = EpisodeSaver(log_folder, 0, self.state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states) # Initialize Baxter effector by connecting to the Gym bridge ROS node: self.context = zmq.Context() self.socket = self.context.socket(zmq.PAIR) self.socket.connect("tcp://{}:{}".format(HOSTNAME, SERVER_PORT)) # note: if takes too long, run first client, then server print("Waiting for server connection...") msg = self.socket.recv_json() print("Connected to server (received message: {})".format(msg)) self.action = [0, 0] self.reward = 0 self.robobo_pos = np.array([0, 0]) # Initialize the state if self._renders: self.image_plot = None def step(self, action): """ :action: (int) :return: (tensor (np.ndarray)) observation, int reward, bool done, dict extras) """ assert self.action_space.contains(action) # Convert int action to action in (x,y,z) space self.action = action self._env_step_counter += 1 # Send the action to the server self.socket.send_json({"command": "action", "action": self.action}) # Receive state data (position, etc), important to update state related values self.getEnvState() # Receive a camera image from the server self.observation = self.getObservation() done = self._hasEpisodeTerminated() if self.saver is not None: self.saver.step(self.observation, action, self.reward, done, self.getGroundTruth()) if self.use_srl: return self.getSRLState(self.observation), self.reward, done, {} else: return self.observation, self.reward, done, {} def getEnvState(self): """ Returns a dictionary containing info about the environment state. :return: (dict) state_data containing data related to the state: target_pos, robobo_pos and reward. """ state_data = self.socket.recv_json() self.reward = state_data["reward"] self.target_pos = np.array(state_data["target_pos"]) self.robobo_pos = np.array(state_data["position"]) return state_data def getObservation(self): """ Receive the observation image using a socket :return: (numpy ndarray) observation """ # Receive a camera image from the server self.observation = recvMatrix(self.socket) # Resize it: self.observation = cv2.resize(self.observation, (self.img_shape[2], self.img_shape[1]), interpolation=cv2.INTER_AREA) return self.observation def getTargetPos(self): """ :return (numpy array): Position of the target (button) """ return self.target_pos @staticmethod def getGroundTruthDim(): """ :return: (int) """ return 2 def getGroundTruth(self): """ Alias for getRoboboPos for compatibility between envs :return: (numpy array) """ return np.array(self.getRoboboPos()) def getRoboboPos(self): """ :return: ([float])-> np.ndarray Position (x, y, z) of Baxter left gripper """ return self.robobo_pos def reset(self): """ Reset the environment :return: (numpy ndarray) first observation of the env """ self.episode_terminated = False # Step count since episode start self._env_step_counter = 0 self.socket.send_json({"command": "reset"}) # Update state related variables, important step to get both data and # metadata that allow reading the observation image self.getEnvState() self.observation = self.getObservation() if self.saver is not None: self.saver.reset(self.observation, self.getTargetPos(), self.getGroundTruth()) if self.use_srl: return self.getSRLState(self.observation) else: return self.observation def _hasEpisodeTerminated(self): """ Returns True if the episode is over and False otherwise """ if self.episode_terminated or self._env_step_counter > MAX_STEPS: return True return False def closeServerConnection(self): """ To be called at the end of running the program, externally """ print("Robobo_env client exiting and closing socket...") self.socket.send_json({"command": "exit"}) self.socket.close() def render(self, mode='rgb_array'): """ :param mode: (str) :return: (numpy array) BGR image """ if mode != "rgb_array": print('render in human mode not yet supported') return np.array([]) if self._renders: plt.ion() # needed for interactive update if self.image_plot is None: plt.figure('Robobo RL') self.image_plot = plt.imshow(bgr2rgb(self.observation), cmap='gray') self.image_plot.axes.grid(False) else: self.image_plot.set_data(bgr2rgb(self.observation)) plt.draw() # Wait a bit, so that plot is visible plt.pause(0.0001) return self.observation
def __init__(self, renders=False, name="Omnirobot", is_discrete=True, save_path='srl_zoo/data/', state_dim=-1, learn_states=False, srl_model="raw_pixels", record_data=False, action_repeat=1, random_target=True, shape_reward=False, simple_continual_target=False, circular_continual_move=False, square_continual_move=False, eight_continual_move=False, short_episodes=False, state_init_override=None, env_rank=0, srl_pipe=None, img_shape=None, **_): super(OmniRobotEnv, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) if action_repeat != 1: raise NotImplementedError self.server_port = SERVER_PORT + env_rank self.n_contacts = 0 use_ground_truth = srl_model == 'ground_truth' use_srl = srl_model != 'raw_pixels' self.use_srl = use_srl or use_ground_truth self.use_ground_truth = use_ground_truth self.use_joints = False self.relative_pos = RELATIVE_POS self._is_discrete = is_discrete self.observation = [] # Start simulation with first observation self._env_step_counter = 0 self.episode_terminated = False self.state_dim = state_dim if img_shape is None: self.img_shape = (3, RENDER_HEIGHT, RENDER_WIDTH) else: self.img_shape = img_shape self._renders = renders self._shape_reward = shape_reward self.cuda = th.cuda.is_available() self.target_pos = None self.saver = None self._random_target = random_target self.simple_continual_target = simple_continual_target self.circular_continual_move = circular_continual_move self.square_continual_move = square_continual_move self.eight_continual_move = eight_continual_move self.short_episodes = short_episodes if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: action_dim = 2 self.action_space = RingBox(positive_low=ACTION_POSITIVE_LOW, positive_high=ACTION_POSITIVE_HIGH, negative_low=ACTION_NEGATIVE_LOW, negative_high=ACTION_NEGATIVE_HIGH, shape=np.array([action_dim]), dtype=np.float32) # SRL model if self.use_srl: if use_ground_truth: self.state_dim = self.getGroundTruthDim() self.dtype = np.float32 self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=self.dtype) else: self.dtype = np.uint8 self.observation_space = spaces.Box(low=0, high=255, shape=(self.img_shape[2], self.img_shape[1], 3), dtype=self.dtype) if record_data: print("Recording data...") self.saver = EpisodeSaver(name, 0, self.state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states, path=save_path) if USING_OMNIROBOT_SIMULATOR: self.socket = OmniRobotSimulatorSocket( simple_continual_target=simple_continual_target, circular_continual_move=circular_continual_move, square_continual_move=square_continual_move, eight_continual_move=eight_continual_move, output_size=[self.img_shape[2], self.img_shape[1]], random_target=self._random_target, state_init_override=state_init_override) else: # Initialize Baxter effector by connecting to the Gym bridge ROS node: self.context = zmq.Context() self.socket = self.context.socket(zmq.PAIR) self.socket.connect("tcp://{}:{}".format(HOSTNAME, self.server_port)) # note: if takes too long, run first client, then server print("Waiting for server connection at port {}...".format( self.server_port)) # hide the output of server msg = self.socket.recv_json() print( "Connected to server on port {} (received message: {})".format( self.server_port, msg)) self.action = [0, 0] self.reward = 0 self.robot_pos = np.array([0, 0]) # Initialize the state if self._renders: self.image_plot = None
class OmniRobotEnv(SRLGymEnv): """ OmniRobot robot Environment (Gym wrapper for OmniRobot environment) The goal of Omnirobot is to go to the location on the table (signaled with a circle sticker on the table) :param renders: (bool) Whether to display the GUI or not :param is_discrete: (bool) true if action space is discrete vs continuous :param save_path: (str) name of the folder where recorded data will be stored :param state_dim: (int) When learning states :param learn_states: (bool) :param record_data: (bool) Set to true, record frames with the rewards. :param shape_reward: (bool) Set to true, reward = -distance_to_goal :param env_rank: (int) the number ID of the environment :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model """ def __init__(self, renders=False, name="Omnirobot", is_discrete=True, save_path='srl_zoo/data/', state_dim=-1, learn_states=False, srl_model="raw_pixels", record_data=False, action_repeat=1, random_target=True, shape_reward=False, simple_continual_target=False, circular_continual_move=False, square_continual_move=False, eight_continual_move=False, short_episodes=False, state_init_override=None, env_rank=0, srl_pipe=None, img_shape=None, **_): super(OmniRobotEnv, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) if action_repeat != 1: raise NotImplementedError self.server_port = SERVER_PORT + env_rank self.n_contacts = 0 use_ground_truth = srl_model == 'ground_truth' use_srl = srl_model != 'raw_pixels' self.use_srl = use_srl or use_ground_truth self.use_ground_truth = use_ground_truth self.use_joints = False self.relative_pos = RELATIVE_POS self._is_discrete = is_discrete self.observation = [] # Start simulation with first observation self._env_step_counter = 0 self.episode_terminated = False self.state_dim = state_dim if img_shape is None: self.img_shape = (3, RENDER_HEIGHT, RENDER_WIDTH) else: self.img_shape = img_shape self._renders = renders self._shape_reward = shape_reward self.cuda = th.cuda.is_available() self.target_pos = None self.saver = None self._random_target = random_target self.simple_continual_target = simple_continual_target self.circular_continual_move = circular_continual_move self.square_continual_move = square_continual_move self.eight_continual_move = eight_continual_move self.short_episodes = short_episodes if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: action_dim = 2 self.action_space = RingBox(positive_low=ACTION_POSITIVE_LOW, positive_high=ACTION_POSITIVE_HIGH, negative_low=ACTION_NEGATIVE_LOW, negative_high=ACTION_NEGATIVE_HIGH, shape=np.array([action_dim]), dtype=np.float32) # SRL model if self.use_srl: if use_ground_truth: self.state_dim = self.getGroundTruthDim() self.dtype = np.float32 self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=self.dtype) else: self.dtype = np.uint8 self.observation_space = spaces.Box(low=0, high=255, shape=(self.img_shape[2], self.img_shape[1], 3), dtype=self.dtype) if record_data: print("Recording data...") self.saver = EpisodeSaver(name, 0, self.state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states, path=save_path) if USING_OMNIROBOT_SIMULATOR: self.socket = OmniRobotSimulatorSocket( simple_continual_target=simple_continual_target, circular_continual_move=circular_continual_move, square_continual_move=square_continual_move, eight_continual_move=eight_continual_move, output_size=[self.img_shape[2], self.img_shape[1]], random_target=self._random_target, state_init_override=state_init_override) else: # Initialize Baxter effector by connecting to the Gym bridge ROS node: self.context = zmq.Context() self.socket = self.context.socket(zmq.PAIR) self.socket.connect("tcp://{}:{}".format(HOSTNAME, self.server_port)) # note: if takes too long, run first client, then server print("Waiting for server connection at port {}...".format( self.server_port)) # hide the output of server msg = self.socket.recv_json() print( "Connected to server on port {} (received message: {})".format( self.server_port, msg)) self.action = [0, 0] self.reward = 0 self.robot_pos = np.array([0, 0]) # Initialize the state if self._renders: self.image_plot = None def actionPolicyTowardTarget(self): """ :return: (int) action """ if abs(self.robot_pos[0] - self.target_pos[0]) > abs(self.robot_pos[1] - self.target_pos[1]): if self._is_discrete: return int( Move.FORWARD ) if self.robot_pos[0] < self.target_pos[0] else int( Move.BACKWARD) # forward # backward else: return DELTA_POS if self.robot_pos[0] < self.target_pos[ 0] else -DELTA_POS else: if self._is_discrete: # left # right return int( Move.LEFT ) if self.robot_pos[1] < self.target_pos[1] else int( Move.RIGHT) else: return DELTA_POS if self.robot_pos[1] < self.target_pos[ 1] else -DELTA_POS def step(self, action, generated_observation=None, action_proba=None, action_grid_walker=None): """ :param :action: (int) :param generated_observation: :param action_proba: :param action_grid_walker: # Whether or not we want to override the action with the one from a grid walker :return: (tensor (np.ndarray)) observation, int reward, bool done, dict extras) """ if action_grid_walker is None: action_to_step = action action_from_teacher = None else: action_to_step = action_grid_walker action_from_teacher = action assert self.action_space.contains(action_from_teacher) if not self._is_discrete: action_to_step = np.array(action_to_step) assert self.action_space.contains(action_to_step) # Convert int action to action in (x,y,z) space # serialize the action if isinstance(action_to_step, np.ndarray): self.action = action_to_step.tolist() elif hasattr(action_to_step, 'dtype'): # convert numpy type to python type self.action = action.item() else: self.action = action_to_step self._env_step_counter += 1 # Send the action to the server self.socket.send_json({ "command": "action", "action": self.action, "is_discrete": self._is_discrete, "step_counter": self._env_step_counter }) # Receive state data (position, etc), important to update state related values self.getEnvState() # Receive a camera image from the server self.observation = self.getObservation( ) if generated_observation is None else generated_observation * 255 done = self._hasEpisodeTerminated() self.render() if self.saver is not None: self.saver.step(self.observation, action_from_teacher if action_grid_walker is not None else action_to_step, self.reward, done, self.getGroundTruth(), action_proba=action_proba) old_observation = self.getObservation() if self.use_srl: return self.getSRLState( self.observation if generated_observation is None else old_observation), self.reward, done, {} else: return self.observation, self.reward, done, {} def getEnvState(self): """ Returns a dictionary containing info about the environment state. :return: (dict) state_data containing data related to the state: target_pos, robot_pos and reward. """ state_data = self.socket.recv_json() self.reward = state_data["reward"] self.target_pos = np.array(state_data["target_pos"]) self.robot_pos = np.array(state_data["position"]) return state_data def getObservation(self): """ Receive the observation image using a socket :return: (numpy ndarray) observation """ # Receive a camera image from the server self.observation = recvMatrix(self.socket) # Resize it: self.observation = cv2.resize(self.observation, (self.img_shape[2], self.img_shape[1]), interpolation=cv2.INTER_AREA) return self.observation def getTargetPos(self): """ :return (numpy array): Position of the target (button) """ return self.target_pos @staticmethod def getGroundTruthDim(): """ :return: (int) """ return 2 def getGroundTruth(self): """ Alias for getRobotPos for compatibility between envs :return: (numpy array) """ return np.array(self.getRobotPos()) def getRobotPos(self): """ :return: ([float])-> np.ndarray Position (x, y, z) of Baxter left gripper """ return self.robot_pos def reset(self, aligned=False, init_with_real=False, generated_observation=None, state_override=None): """ Reset the environment :param generated_observation: :param state_override: :return: (numpy ndarray) first observation of the env """ self.episode_terminated = False # Step count since episode start self._env_step_counter = 0 # set n contact count self.n_contacts = 0 self.socket.send_json({ "command": "reset", "step_counter": self._env_step_counter, "aligned": aligned }) # Update state related variables, important step to get both data and # metadata that allow reading the observation image self.getEnvState() if not init_with_real: self.robot_pos = np.array( [0, 0]) if state_override is None else state_override self.observation = self.getObservation( ) if generated_observation is None else generated_observation * 255 if self.saver is not None: self.saver.reset(self.observation, self.getTargetPos(), self.getGroundTruth()) old_observation = self.getObservation() if self.use_srl: return self.getSRLState(self.observation if generated_observation is None else old_observation) else: return self.observation def _hasEpisodeTerminated(self): """ Returns True if the episode is over and False otherwise """ if (self.episode_terminated or self._env_step_counter > MAX_STEPS) or \ (self.n_contacts >= N_CONTACTS_BEFORE_TERMINATION and self.short_episodes and self.simple_continual_target) or \ (self._env_step_counter > MAX_STEPS_CIRCULAR_TASK_SHORT_EPISODES and self.short_episodes and self.circular_continual_move): return True if np.abs(self.reward - REWARD_TARGET_REACH) < 0.000001: # reach the target self.n_contacts += 1 else: self.n_contacts += 0 return False def closeServerConnection(self): """ To be called at the end of running the program, externally """ print("Omnirobot_env client exiting and closing socket...") self.socket.send_json({"command": "exit"}) self.socket.close() def render(self, mode='rgb_array'): """ :param mode: (str) :return: (numpy array) BGR image """ if self._renders: if mode != "rgb_array": print('render in human mode not yet supported') return np.array([]) plt.ion() # needed for interactive update if self.image_plot is None: plt.figure('Omnirobot RL') self.initVisualizeBoundary() self.visualizeBoundary() self.image_plot = plt.imshow(self.observation_with_boundary, cmap='gray') self.image_plot.axes.grid(False) else: self.visualizeBoundary() self.image_plot.set_data(self.observation_with_boundary) plt.draw() # Wait a bit, so that plot is visible plt.pause(0.0001) return self.observation def initVisualizeBoundary(self): with open(CAMERA_INFO_PATH, 'r') as stream: try: contents = yaml.load(stream) camera_matrix = np.array( contents['camera_matrix']['data']).reshape((3, 3)) distortion_coefficients = np.array( contents['distortion_coefficients']['data']).reshape( (1, 5)) except yaml.YAMLError as exc: print(exc) # camera installation info r = R.from_euler('xyz', CAMERA_ROT_EULER_COORD_GROUND, degrees=True) camera_rot_mat_coord_ground = r.as_dcm() pos_transformer = PosTransformer(camera_matrix, distortion_coefficients, CAMERA_POS_COORD_GROUND, camera_rot_mat_coord_ground) self.boundary_coner_pixel_pos = np.zeros((2, 4)) # assume that image is undistorted self.boundary_coner_pixel_pos[:, 0] = \ pos_transformer.phyPosGround2PixelPos([MIN_X, MIN_Y], return_distort_image_pos=False).squeeze() self.boundary_coner_pixel_pos[:, 1] = \ pos_transformer.phyPosGround2PixelPos([MAX_X, MIN_Y], return_distort_image_pos=False).squeeze() self.boundary_coner_pixel_pos[:, 2] = \ pos_transformer.phyPosGround2PixelPos([MAX_X, MAX_Y], return_distort_image_pos=False).squeeze() self.boundary_coner_pixel_pos[:, 3] = \ pos_transformer.phyPosGround2PixelPos([MIN_X, MAX_Y], return_distort_image_pos=False).squeeze() # transform the corresponding points into cropped image self.boundary_coner_pixel_pos = self.boundary_coner_pixel_pos - ( np.array(ORIGIN_SIZE) - np.array(CROPPED_SIZE)).reshape(2, 1) / 2.0 # transform the corresponding points into resized image (self.img_shape[2], self.img_shape[1]) self.boundary_coner_pixel_pos[ 0, :] *= self.img_shape[2] / CROPPED_SIZE[0] self.boundary_coner_pixel_pos[ 1, :] *= self.img_shape[1] / CROPPED_SIZE[1] self.boundary_coner_pixel_pos = np.around( self.boundary_coner_pixel_pos).astype(np.int) # Create square for vizu of objective in continual square task if self.square_continual_move: self.boundary_coner_pixel_pos_continual = np.zeros((2, 4)) # assume that image is undistorted self.boundary_coner_pixel_pos_continual[:, 0] = \ pos_transformer.phyPosGround2PixelPos([-RADIUS, -RADIUS], return_distort_image_pos=False).squeeze() self.boundary_coner_pixel_pos_continual[:, 1] = \ pos_transformer.phyPosGround2PixelPos([RADIUS, -RADIUS], return_distort_image_pos=False).squeeze() self.boundary_coner_pixel_pos_continual[:, 2] = \ pos_transformer.phyPosGround2PixelPos([RADIUS, RADIUS], return_distort_image_pos=False).squeeze() self.boundary_coner_pixel_pos_continual[:, 3] = \ pos_transformer.phyPosGround2PixelPos([-RADIUS, RADIUS], return_distort_image_pos=False).squeeze() # transform the corresponding points into cropped image self.boundary_coner_pixel_pos_continual = self.boundary_coner_pixel_pos_continual - ( np.array(ORIGIN_SIZE) - np.array(CROPPED_SIZE)).reshape( 2, 1) / 2.0 # transform the corresponding points into resized image (self.img_shape[2], self.img_shape[1]) self.boundary_coner_pixel_pos_continual[ 0, :] *= self.img_shape[2] / CROPPED_SIZE[0] self.boundary_coner_pixel_pos_continual[ 1, :] *= self.img_shape[1] / CROPPED_SIZE[1] self.boundary_coner_pixel_pos_continual = np.around( self.boundary_coner_pixel_pos_continual).astype(np.int) elif self.circular_continual_move: self.center_coordinates = \ pos_transformer.phyPosGround2PixelPos([0, 0], return_distort_image_pos=False).squeeze() self.center_coordinates = self.center_coordinates - ( np.array(ORIGIN_SIZE) - np.array(CROPPED_SIZE)) / 2.0 # transform the corresponding points into resized image (self.img_shape[2], self.img_shape[1]) self.center_coordinates[0] *= self.img_shape[2] / CROPPED_SIZE[0] self.center_coordinates[1] *= self.img_shape[1] / CROPPED_SIZE[1] self.center_coordinates = np.around( self.center_coordinates).astype(np.int) # Points to convert radisu in env space self.boundary_coner_pixel_pos_continual = \ pos_transformer.phyPosGround2PixelPos([0, RADIUS], return_distort_image_pos=False).squeeze() # transform the corresponding points into cropped image self.boundary_coner_pixel_pos_continual = self.boundary_coner_pixel_pos_continual - ( np.array(ORIGIN_SIZE) - np.array(CROPPED_SIZE)) / 2.0 # transform the corresponding points into resized image (self.img_shape[2], self.img_shape[1]) self.boundary_coner_pixel_pos_continual[ 0] *= self.img_shape[2] / CROPPED_SIZE[0] self.boundary_coner_pixel_pos_continual[ 1] *= self.img_shape[1] / CROPPED_SIZE[1] self.boundary_coner_pixel_pos_continual = np.around( self.boundary_coner_pixel_pos_continual).astype(np.int) def visualizeBoundary(self): """ visualize the unvisible boundary, should call initVisualizeBoundary first """ self.observation_with_boundary = self.observation.copy() # Add boundary continual if self.square_continual_move: for idx in range(4): idx_next = idx + 1 cv2.line( self.observation_with_boundary, tuple(self.boundary_coner_pixel_pos_continual[:, idx]), tuple(self.boundary_coner_pixel_pos_continual[:, idx_next % 4]), (0, 0, 200), 1) elif self.circular_continual_move: radius_converted = np.linalg.norm( self.center_coordinates - self.boundary_coner_pixel_pos_continual) cv2.circle(self.observation_with_boundary, tuple(self.center_coordinates), np.float32(radius_converted), (0, 0, 200), 2) # Add boundary of env for idx in range(4): idx_next = idx + 1 cv2.line(self.observation_with_boundary, tuple(self.boundary_coner_pixel_pos[:, idx]), tuple(self.boundary_coner_pixel_pos[:, idx_next % 4]), (200, 0, 0), 1)
def __init__(self, urdf_root=pybullet_data.getDataPath(), renders=False, is_discrete=True, name="mobile_robot", max_distance=1.6, shape_reward=False, record_data=False, srl_model="raw_pixels", random_target=False, force_down=True, state_dim=-1, learn_states=False, verbose=False, save_path='srl_zoo/data/', env_rank=0, srl_pipe=None, fpv=False, img_shape=None, **_): super(MobileRobotGymEnv, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) self._timestep = 1. / 240. self._urdf_root = urdf_root self._observation = [] self._env_step_counter = 0 self._renders = renders self.img_shape = img_shape # channel first !! if self.img_shape is None: self._width = RENDER_WIDTH self._height = RENDER_HEIGHT else: self._height, self._width = self.img_shape[1:] self._cam_dist = 4.4 self._cam_yaw = 90 self._cam_pitch = -90 self._cam_roll = 0 self._max_distance = max_distance self._shape_reward = shape_reward self._random_target = random_target self._force_down = force_down self.camera_target_pos = (2, 2, 0) self._is_discrete = is_discrete self.terminated = False self.renderer = p.ER_TINY_RENDERER self.debug = False self.n_contacts = 0 self.state_dim = state_dim self.relative_pos = RELATIVE_POS self.cuda = th.cuda.is_available() self.saver = None self.verbose = verbose self.max_steps = MAX_STEPS self.robot_pos = np.zeros(3) self.robot_uid = None self.target_pos = np.zeros(3) self.target_uid = None # Boundaries of the square env self._min_x, self._max_x = 0, 4 self._min_y, self._max_y = 0, 4 self.has_bumped = False # Used for handling collisions self.collision_margin = 0.1 self.walls = None self.fpv = fpv self.srl_model = srl_model if record_data: self.saver = EpisodeSaver(name, max_distance, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states, path=save_path) if self._renders: client_id = p.connect(p.SHARED_MEMORY) if client_id < 0: p.connect(p.GUI) p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33]) self.debug = True # Debug sliders for moving the camera self.x_slider = p.addUserDebugParameter("x_slider", -10, 10, self.camera_target_pos[0]) self.y_slider = p.addUserDebugParameter("y_slider", -10, 10, self.camera_target_pos[1]) self.z_slider = p.addUserDebugParameter("z_slider", -10, 10, self.camera_target_pos[2]) self.dist_slider = p.addUserDebugParameter("cam_dist", 0, 10, self._cam_dist) self.yaw_slider = p.addUserDebugParameter("cam_yaw", -180, 180, self._cam_yaw) self.pitch_slider = p.addUserDebugParameter( "cam_pitch", -180, 180, self._cam_pitch) else: p.connect(p.DIRECT) global CONNECTED_TO_SIMULATOR CONNECTED_TO_SIMULATOR = True if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: self.action_space = spaces.Box(low=-1, high=1, shape=(2, ), dtype=np.float32) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() if self.srl_model == "raw_pixels": self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=np.float32)
class MobileRobotGymEnv(SRLGymEnv): """ Gym wrapper for Mobile Robot environment WARNING: to be compatible with kuka scripts, additional keyword arguments are discarded :param urdf_root: (str) Path to pybullet urdf files :param renders: (bool) Whether to display the GUI or not :param is_discrete: (bool) Whether to use discrete or continuous actions :param name: (str) name of the folder where recorded data will be stored :param max_distance: (float) Max distance between end effector and the button (for negative reward) :param shape_reward: (bool) Set to true, reward = -distance_to_goal :param record_data: (bool) Set to true, record frames with the rewards. :param random_target: (bool) Set the target to a random position :param state_dim: (int) When learning states :param learn_states: (bool) :param verbose: (bool) Whether to print some debug info :param save_path: (str) location where the saved data should go :param env_rank: (int) the number ID of the environment :param pipe: (Queue, [Queue]) contains the input and output of the SRL model :param fpv: (bool) enable first person view camera :param srl_model: (str) The SRL_model used """ def __init__(self, urdf_root=pybullet_data.getDataPath(), renders=False, is_discrete=True, name="mobile_robot", max_distance=1.6, shape_reward=False, record_data=False, srl_model="raw_pixels", random_target=False, force_down=True, state_dim=-1, learn_states=False, verbose=False, save_path='srl_zoo/data/', env_rank=0, srl_pipe=None, fpv=False, img_shape=None, **_): super(MobileRobotGymEnv, self).__init__(srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) self._timestep = 1. / 240. self._urdf_root = urdf_root self._observation = [] self._env_step_counter = 0 self._renders = renders self.img_shape = img_shape # channel first !! if self.img_shape is None: self._width = RENDER_WIDTH self._height = RENDER_HEIGHT else: self._height, self._width = self.img_shape[1:] self._cam_dist = 4.4 self._cam_yaw = 90 self._cam_pitch = -90 self._cam_roll = 0 self._max_distance = max_distance self._shape_reward = shape_reward self._random_target = random_target self._force_down = force_down self.camera_target_pos = (2, 2, 0) self._is_discrete = is_discrete self.terminated = False self.renderer = p.ER_TINY_RENDERER self.debug = False self.n_contacts = 0 self.state_dim = state_dim self.relative_pos = RELATIVE_POS self.cuda = th.cuda.is_available() self.saver = None self.verbose = verbose self.max_steps = MAX_STEPS self.robot_pos = np.zeros(3) self.robot_uid = None self.target_pos = np.zeros(3) self.target_uid = None # Boundaries of the square env self._min_x, self._max_x = 0, 4 self._min_y, self._max_y = 0, 4 self.has_bumped = False # Used for handling collisions self.collision_margin = 0.1 self.walls = None self.fpv = fpv self.srl_model = srl_model if record_data: self.saver = EpisodeSaver(name, max_distance, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states, path=save_path) if self._renders: client_id = p.connect(p.SHARED_MEMORY) if client_id < 0: p.connect(p.GUI) p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33]) self.debug = True # Debug sliders for moving the camera self.x_slider = p.addUserDebugParameter("x_slider", -10, 10, self.camera_target_pos[0]) self.y_slider = p.addUserDebugParameter("y_slider", -10, 10, self.camera_target_pos[1]) self.z_slider = p.addUserDebugParameter("z_slider", -10, 10, self.camera_target_pos[2]) self.dist_slider = p.addUserDebugParameter("cam_dist", 0, 10, self._cam_dist) self.yaw_slider = p.addUserDebugParameter("cam_yaw", -180, 180, self._cam_yaw) self.pitch_slider = p.addUserDebugParameter( "cam_pitch", -180, 180, self._cam_pitch) else: p.connect(p.DIRECT) global CONNECTED_TO_SIMULATOR CONNECTED_TO_SIMULATOR = True if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: self.action_space = spaces.Box(low=-1, high=1, shape=(2, ), dtype=np.float32) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() if self.srl_model == "raw_pixels": self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=np.float32) def getTargetPos(self): # Return only the [x, y] coordinates return self.target_pos[:2] # @staticmethod def getGroundTruthDim(self): """ The new convention for the ground truth (GT): GT should include target position under random target setting. """ ## HACK : Monkey-Patch, shit solution to solve problem. if not self._random_target: return 2 else: return 4 def getRobotPos(self): # Return only the [x, y] coordinates return np.array(self.robot_pos)[:2] def getGroundTruth(self): """ The new convention for the ground truth (GT): GT should include target position under random target setting. A better solution would be "change all the environment files, especially srl_env.py" !!! """ ## HACK: Monkey-Patch, shit solution to solve problem. robot_pos = self.getRobotPos() if self._random_target: if self.relative_pos: ## HACK here! Change srl_env.py and all the other envs in the future !!! TODO TODO TODO return robot_pos else: ## check 'envs.observation_space' in rl_baselines/base_classes.py (before self.model.learn) !!! target_pos = self.getTargetPos() return np.concatenate([robot_pos, target_pos], axis=0) else: if self.relative_pos: ## HACK here! Change srl_env.py and all the other envs in the future !!! TODO TODO TODO return robot_pos else: return robot_pos def reset(self): self.terminated = False p.resetSimulation() p.setPhysicsEngineParameter(numSolverIterations=150) p.setTimeStep(self._timestep) p.loadURDF(os.path.join(self._urdf_root, "plane.urdf"), [0, 0, 0]) p.setGravity(0, 0, -10) # Init the robot randomly x_start = self._max_x / 2 + self.np_random.uniform( -self._max_x / 3, self._max_x / 3) y_start = self._max_y / 2 + self.np_random.uniform( -self._max_y / 3, self._max_y / 3) self.robot_pos = np.array([x_start, y_start, 0]) # Initialize target position x_pos = 0.9 * self._max_x y_pos = self._max_y * 3 / 4 if self._random_target: margin = 0.1 * self._max_x x_pos = self.np_random.uniform(self._min_x + margin, self._max_x - margin) y_pos = self.np_random.uniform(self._min_y + margin, self._max_y - margin) self.target_uid = p.loadURDF("/urdf/cylinder.urdf", [x_pos, y_pos, 0], useFixedBase=True) self.target_pos = np.array([x_pos, y_pos, 0]) # Add walls # Path to the urdf file wall_urdf = "/urdf/wall.urdf" # Rgba colors red, green, blue = [0.8, 0, 0, 1], [0, 0.8, 0, 1], [0, 0, 0.8, 1] wall_left = p.loadURDF(wall_urdf, [self._max_x / 2, 0, 0], useFixedBase=True) # Change color p.changeVisualShape(wall_left, -1, rgbaColor=red) # getQuaternionFromEuler -> define orientation wall_bottom = p.loadURDF(wall_urdf, [self._max_x, self._max_y / 2, 0], p.getQuaternionFromEuler([0, 0, np.pi / 2]), useFixedBase=True) wall_right = p.loadURDF(wall_urdf, [self._max_x / 2, self._max_y, 0], useFixedBase=True) p.changeVisualShape(wall_right, -1, rgbaColor=green) wall_top = p.loadURDF(wall_urdf, [self._min_x, self._max_y / 2, 0], p.getQuaternionFromEuler([0, 0, np.pi / 2]), useFixedBase=True) p.changeVisualShape(wall_top, -1, rgbaColor=blue) self.walls = [wall_left, wall_bottom, wall_right, wall_top] # Add mobile robot self.robot_uid = p.loadURDF(os.path.join(self._urdf_root, "racecar/racecar.urdf"), self.robot_pos, useFixedBase=True) self._env_step_counter = 0 for _ in range(50): p.stepSimulation() self._observation = self.getObservation() if self.saver is not None: self.saver.reset(self._observation, self.getTargetPos(), self.getRobotPos()) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation) return np.array(self._observation) def __del__(self): if CONNECTED_TO_SIMULATOR: p.disconnect() def getObservation(self): """ :return: (numpy array) """ self._observation = self.render("rgb_array") return self._observation def step(self, action): # True if it has bumped against a wall self.has_bumped = False if self._is_discrete: dv = DELTA_POS # Add noise to action dv += self.np_random.normal(0.0, scale=NOISE_STD) dx = [-dv, dv, 0, 0][action] dy = [0, 0, -dv, dv][action] real_action = np.array([dx, dy]) else: dv = DELTA_POS # Add noise to action dv += self.np_random.normal(0.0, scale=NOISE_STD) # scale action amplitude between -dv and dv real_action = np.maximum(np.minimum(action, 1), -1) * dv if self.verbose: print(np.array2string(np.array(real_action), precision=2)) previous_pos = self.robot_pos.copy() self.robot_pos[:2] += real_action # Handle collisions for i, (limit, robot_dim) in enumerate( zip([self._max_x, self._max_y], [ROBOT_LENGTH, ROBOT_WIDTH])): margin = self.collision_margin + robot_dim / 2 # If it has bumped against a wall, stay at the previous position if self.robot_pos[i] < margin or self.robot_pos[i] > limit - margin: self.has_bumped = True self.robot_pos = previous_pos break # Update mobile robot position p.resetBasePositionAndOrientation(self.robot_uid, self.robot_pos, [0, 0, 0, 1]) p.stepSimulation() self._env_step_counter += 1 self._observation = self.getObservation() reward = self._reward() done = self._termination() if self.saver is not None: self.saver.step(self._observation, action, reward, done, self.getRobotPos()) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation), reward, done, {} return np.array(self._observation), reward, done, {} def render(self, mode='human', close=False): if mode != "rgb_array": return np.array([]) camera_target_pos = self.camera_target_pos if self.debug: self._cam_dist = p.readUserDebugParameter(self.dist_slider) self._cam_yaw = p.readUserDebugParameter(self.yaw_slider) self._cam_pitch = p.readUserDebugParameter(self.pitch_slider) x = p.readUserDebugParameter(self.x_slider) y = p.readUserDebugParameter(self.y_slider) z = p.readUserDebugParameter(self.z_slider) camera_target_pos = (x, y, z) # TODO: recompute view_matrix and proj_matrix only in debug mode view_matrix = p.computeViewMatrixFromYawPitchRoll( cameraTargetPosition=camera_target_pos, distance=self._cam_dist, yaw=self._cam_yaw, pitch=self._cam_pitch, roll=self._cam_roll, upAxisIndex=2) proj_matrix = p.computeProjectionMatrixFOV(fov=60, aspect=float(self._width) / self._height, nearVal=0.1, farVal=100.0) (_, _, px1, _, _) = p.getCameraImage(width=self._width, height=self._height, viewMatrix=view_matrix, projectionMatrix=proj_matrix, renderer=self.renderer) rgb_array = np.array(px1) rgb_array_res = rgb_array[:, :, :3] # if first person view, then stack the obersvation from the car camera if self.fpv: # move camera view_matrix = p.computeViewMatrixFromYawPitchRoll( cameraTargetPosition=(self.robot_pos[0] - 0.25, self.robot_pos[1], 0.15), distance=0.3, yaw=self._cam_yaw, pitch=-17, roll=self._cam_roll, upAxisIndex=2) proj_matrix = p.computeProjectionMatrixFOV( fov=90, aspect=float(self._width) / self._height, nearVal=0.1, farVal=100.0) # get and stack image (_, _, px1, _, _) = p.getCameraImage(width=self._width, height=self._height, viewMatrix=view_matrix, projectionMatrix=proj_matrix, renderer=self.renderer) rgb_array = np.array(px1) rgb_array_res = np.dstack([rgb_array_res, rgb_array[:, :, :3]]) return rgb_array_res def _termination(self): """ :return: (bool) """ if self.terminated or self._env_step_counter > self.max_steps: self._observation = self.getObservation() return True return False def _reward(self): """ :return: (float) """ # Distance to target distance = np.linalg.norm(self.getTargetPos() - self.robot_pos[:2], 2) reward = 0 if distance <= REWARD_DIST_THRESHOLD: reward = 1 # self.terminated = True # Negative reward when it bumps into a wall if self.has_bumped: reward = -1 if self._shape_reward: return -distance return reward
class LabyrinthEnv3(SRLGymEnv): """ This Labyrinth environment could can at speed 36,000 FPS on 10 CPUs (Intel i9-9900K) Labyrinth-v3: add two Palm trees as obstacle (could be destroyed) add two Oak barrel as obstacle (could be moved and destroyed against wall) :param name: (str) name of the folder where recorded data will be stored :param max_distance: (float) Max distance between end effector and the button (for negative reward) :param record_data: (bool) Set to true, record frames with the rewards. :param random_target: (bool) Set the target to a random position :param state_dim: (int) When learning states :param verbose: (bool) Whether to print some debug info :param save_path: (str) location where the saved data should go :param env_rank: (int) the number ID of the environment :param pipe: (Queue, [Queue]) contains the input and output of the SRL model :param srl_model: (str) The SRL_model used """ def __init__(self, renders=False, is_discrete=True, name="labyrinth", max_distance=1.6, shape_reward=False, record_data=False, srl_model="raw_pixels", random_target=False, state_dim=-1, verbose=False, save_path='srl_zoo/data/', env_rank=0, srl_pipe=None, img_shape=None, **_): super(LabyrinthEnv3, self).__init__(srl_model=srl_model, relative_pos=False, env_rank=env_rank, srl_pipe=srl_pipe) self._observation = [] self._renders = renders self.img_shape = img_shape # channel first !! if self.img_shape is None: self._width = RENDER_WIDTH self._height = RENDER_HEIGHT else: self._height, self._width = self.img_shape[1:] # self._shape_reward = shape_reward self._random_target = random_target self.state_dim = state_dim self.saver = None self.verbose = verbose self.max_steps = MAX_STEPS self.robot_pos = np.zeros(2, dtype=int) # index on the self.map array self.target_pos = [ ] # list of indexes (target positions) on the self.map array self.previous_robot_pos = None self.has_bumped = False # Used for handling collisions self.count_collected_tresors = 0 self._env_step_counter = 0 self.srl_model = srl_model self.maze_size = MAZE_SIZE self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) if record_data: self.saver = EpisodeSaver(name, max_distance, state_dim, globals_=getGlobals(), path=save_path) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() if self.srl_model == "raw_pixels": self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=np.float32) def create_map(self): """ -1 is wall, 0 is free space, 1 is key(tresor), 2 is robot, 3 is obstacle (Palm tree) 4 is oak """ # Create map without robot # put walls self.map = np.zeros((self.maze_size, self.maze_size)) self.map[:, 0] = -1 self.map[:, -1] = -1 self.map[0, :] = -1 self.map[-1, :] = -1 self.map[:3, 4] = -1 self.map[-3:, 4] = -1 # put tresors (targets) self.target_pos = [] self.obstacle_pos = [] self.oak_pos = [] if not self._random_target: self.target_pos.append(np.array([1, -2], dtype=int)) self.target_pos.append(np.array([-2, 1], dtype=int)) self.obstacle_pos.append(np.array([2, 2], dtype=int)) else: valid_target_pos_list = [] for i in range(self.maze_size): for j in range(self.maze_size): if self.map[i, j] == 0: valid_target_pos_list.append( np.array([i, j], dtype=int)) obstacle_ind = self.np_random.choice(np.arange( len(valid_target_pos_list)), 2, replace=False) for index in sorted(obstacle_ind, reverse=True): self.obstacle_pos.append(valid_target_pos_list.pop(index)) oak_ind = self.np_random.choice(np.arange( len(valid_target_pos_list)), 2, replace=False) for index in sorted(oak_ind, reverse=True): self.oak_pos.append(valid_target_pos_list.pop(index)) for index in self.np_random.choice( np.arange(len(valid_target_pos_list)), 2, replace=False): # randomly choose two targets self.target_pos.append(valid_target_pos_list[index]) for key_pos in self.target_pos: self.map[key_pos[0], key_pos[1]] = 1 for key_pos in self.obstacle_pos: self.map[key_pos[0], key_pos[1]] = 3 for key_pos in self.oak_pos: self.map[key_pos[0], key_pos[1]] = 4 valid_robot_pos_list = [] for i in range(self.maze_size): for j in range(self.maze_size): if self.map[i, j] == 0: valid_robot_pos_list.append(np.array([i, j], dtype=int)) # load images target_img_ori = cv2.imread("./environments/labyrinth/tresors_128.png") robot_img_ori = cv2.imread("./environments/labyrinth/corsair_128.png") palm_img_ori = cv2.imread("./environments/labyrinth/palm_128.jpg") oak_img_ori = cv2.imread("./environments/labyrinth/oak_128.jpg") map_h, map_w = self.map.shape self.square_size = int(self._height / map_h) self.target_img = cv2.resize( target_img_ori, (self.square_size, self.square_size))[..., ::-1] self.robot_img = cv2.resize( robot_img_ori, (self.square_size, self.square_size))[..., ::-1] self.palm_img = cv2.resize( palm_img_ori, (self.square_size, self.square_size))[..., ::-1] self.oak_img = cv2.resize( oak_img_ori, (self.square_size, self.square_size))[..., ::-1] return valid_robot_pos_list def getGroundTruthDim(self): """ The new convention for the ground truth (GT): GT should include target position under random target setting. """ return self.maze_size**2 def getGroundTruth(self): return self.map.ravel() def reset(self): self.count_collected_tresors = 0 self._env_step_counter = 0 self.has_bumped = False self.previous_robot_pos = None self._observation = None valid_robot_pos_list = self.create_map() # put robot self.robot_pos = valid_robot_pos_list[self.np_random.choice( np.arange(len(valid_robot_pos_list)))] self.map[self.robot_pos[0], self.robot_pos[1]] = 2 self._observation = self.getObservation(start=True) if self.saver is not None: self.saver.reset(self._observation, np.zeros(2), np.zeros(2)) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation) return self._observation def getObservation(self, start=False): """ :return: (numpy array) """ self._observation = self.render("rgb_array", start=start) return self._observation def valid_action(self, action): """ The most important function. It's define the rule of Labyrinth check whether an action is valid return (bool) """ if action == 0: real_action = np.array([1, 0]) elif action == 1: real_action = np.array([0, 1]) elif action == 2: real_action = np.array([-1, 0]) elif action == 3: real_action = np.array([0, -1]) else: raise NotImplementedError next_pos = self.robot_pos + real_action # import ipdb; ipdb.set_trace() valid = np.all(next_pos >= 0) and np.all( next_pos < self.maze_size) and (self.map[next_pos[0], next_pos[1]] != -1) return valid, next_pos, real_action def step(self, action): ## action = 0, 1, 2, 3 # True if it has bumped against a wall self._env_step_counter += 1 self.has_bumped = False previous_pos = self.robot_pos.copy() self.previous_robot_pos = previous_pos valid, next_pos, real_action = self.valid_action(action) self.has_bumped = not valid if self.has_bumped: reward = -1 elif (self.map[next_pos[0], next_pos[1]] == 1): self.count_collected_tresors += 1 reward = 10 elif (self.map[next_pos[0], next_pos[1]] == 4): next_next_pos = next_pos + real_action if self.map[next_next_pos[0], next_next_pos[1]] == -1: # it's wall pass # nothing change else: self.map[next_next_pos[0], next_next_pos[1]] = 4 reward = -0.1 else: reward = -0.1 done = (self.count_collected_tresors == len( self.target_pos)) or (self._env_step_counter > self.max_steps) if self.has_bumped: # no need to update observation pass else: self.map[previous_pos[0], previous_pos[1]] = 0 # free space # update robot position and the map self.robot_pos = next_pos self.map[self.robot_pos[0], self.robot_pos[1]] = 2 # update observation self._observation = self.getObservation() if self.saver is not None: # HACK TODO TODO: used for the estimation of GTC (ground truth correlation) if reward >= 1: discret_reward = 1 elif reward <= -1: discret_reward = -1 else: discret_reward = 0 self.saver.step(self._observation, action, discret_reward, done, np.zeros(2)) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation), reward, done, {} return self._observation, reward, done, {} def render(self, mode='rgb_array', start=False): # [OPT] this part could be optimized again if needed, but it's not necessary, because 36,000 FPS # has already been ~5 times faster then 7000 FPS of PPO2 (with 10 CPUs). if self._observation is None or (isinstance(self._observation, list) and len( self._observation) == 0) or start: previous_obs = 255 * np.ones( (self._height, self._width, 3), dtype=np.uint8) else: previous_obs = self._observation map_h, map_w = self.map.shape square_size = self.square_size for i in range(map_h): for j in range(map_w): if self.map[i, j] == -1: previous_obs[i * square_size:(i + 1) * square_size, j * square_size:(j + 1) * square_size, :] = 0 elif self.map[i, j] == 1: previous_obs[i * square_size:(i + 1) * square_size, j * square_size:(j + 1) * square_size, :] = self.target_img elif self.map[i, j] == 2: previous_obs[i * square_size:(i + 1) * square_size, j * square_size:(j + 1) * square_size, :] = self.robot_img elif self.map[i, j] == 3: previous_obs[i * square_size:(i + 1) * square_size, j * square_size:(j + 1) * square_size, :] = self.palm_img elif self.map[i, j] == 4: previous_obs[i * square_size:(i + 1) * square_size, j * square_size:(j + 1) * square_size, :] = self.oak_img elif self.map[i, j] == 0: # print(self.previous_robot_pos) if self.previous_robot_pos is not None and i == self.previous_robot_pos[ 0] and j == self.previous_robot_pos[1]: previous_obs[i * square_size:(i + 1) * square_size, j * square_size:(j + 1) * square_size, :] = 255 else: pass else: raise NotImplementedError if self._renders: cv2.imshow( "Keep pressing (any key) to display or 'q'/'Esc' to quit.", previous_obs[..., ::-1]) key = cv2.waitKey(0) if key == ord('q') or key == 27: # 'q' or 'Esc' cv2.destroyAllWindows() raise KeyboardInterrupt return previous_obs def interactive(self, show_map=False): image = self.getObservation(start=True) cv2.imshow("image", image[..., ::-1]) while True: k = cv2.waitKey(0) if k == 27 or k == ord("q"): # press 'Esc' or 'q' to quit break elif k == ord("2") or k == 84: # down action = 0 image, reward, done, _ = self.step(action) print("Action: {}, Reward: {}, Done: {}".format( action, reward, done)) if show_map: print(self.map) cv2.imshow("image", image[..., ::-1]) elif k == ord("6") or k == 83: # right action = 1 image, reward, done, _ = self.step(action) print("Action: {}, Reward: {}, Done: {}".format( action, reward, done)) if show_map: print(self.map) cv2.imshow("image", image[..., ::-1]) elif k == ord("8") or k == 82: # up action = 2 image, reward, done, _ = self.step(action) print("Action: {}, Reward: {}, Done: {}".format( action, reward, done)) if show_map: print(self.map) cv2.imshow("image", image[..., ::-1]) elif k == ord("4") or k == 81: # left action = 3 image, reward, done, _ = self.step(action) print("Action: {}, Reward: {}, Done: {}".format( action, reward, done)) if show_map: print(self.map) cv2.imshow("image", image[..., ::-1]) else: print("You are pressing the key: {}".format(k))
class CarRacingEnv(GymCarRacing, SRLGymEnv): def __init__(self, name="car_racing", renders=False, record_data=False, is_discrete=True, state_dim=-1, learn_states=False, save_path='srl_zoo/data/', srl_model="raw_pixels", env_rank=0, srl_pipe=None, lookahead=5, **_): """ Gym wrapper for Racing car environment WARNING: to be compatible with kuka scripts, additional keyword arguments are discarded :param name: (str) name of the folder where recorded data will be stored :param renders: (bool) Whether to display the GUI or not :param record_data: (bool) Set to true, record frames with the rewards. :param is_discrete: (bool) Whether to use discrete or continuous actions :param state_dim: (int) When learning states :param learn_states: (bool) :param save_path: (str) location where the saved data should go :param srl_model: (str) The SRL_model used :param env_rank: (int) the number ID of the environment :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model :param lookahead: (int) How many segments ahead of the current position of the track should the target be """ SRLGymEnv.__init__(self, srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe, img_shape=None) GymCarRacing.__init__(self) self._renders = renders self.img_shape = img_shape if self.img_shape is None: self._width = RENDER_WIDTH self._height = RENDER_HEIGHT else: self._height, self._width = self.img_shape[1:] self._is_discrete = is_discrete self.lookahead = lookahead self.relative_pos = RELATIVE_POS self._env_step_counter = 0 self._observation = None self.saver = None if record_data: self.saver = EpisodeSaver(name, None, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states, path=save_path) # Accelerate, brake, stear left, stear right if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: self.action_space = spaces.Box(low=-1, high=1, shape=(3, ), dtype=np.float32) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() if self.srl_model == "raw_pixels": self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim, ), dtype=np.float32) def getTargetPos(self): # get the nearest track segment to the current position # then return the track segment position that is ahead of the nearest track segement nearest_idx = np.argmin( list( map( lambda a: np.sqrt( np.sum( ((list(a[2:4]) + [0, 0, 0]) - self.getGroundTruth( ))**2)), self.track))) return np.array( list(self.track[(nearest_idx + self.lookahead) % len(self.track)][2:4]) + [0, 0, 0]) @staticmethod def getGroundTruthDim(): return 5 def getGroundTruth(self): # the car's current x,y position, angle, speed and angular speed return np.array( list(self.car.__dict__["hull"].position) + [ self.car.__dict__["hull"].angle, self.car.__dict__["hull"]. inertia, self.car.__dict__["hull"].angularVelocity ]) def getObservation(self): """ :return: (numpy array) """ self._observation = self.render("rgb_array") self._observation = cv2.resize(self._observation, (self._width, self._height)) return self._observation def reset(self): super().reset() self._env_step_counter = 0 self.getObservation() if self.saver is not None: self.saver.reset(self._observation, self.getTargetPos(), self.getGroundTruth()) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation) return self._observation def step(self, action): if action is not None: if self._is_discrete: self.car.steer([-1, 1, 0, 0][action]) self.car.gas([0, 0, 1, 0][action]) self.car.brake([0, 0, 0, 1][action]) else: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.getObservation() step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 self._env_step_counter += 1 # We actually don't want to count fuel spent, we want car to be faster. self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len( self.track) or self._env_step_counter >= MAX_STEPS: done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 if self.saver is not None: self.saver.step(self._observation, action, step_reward, done, self.getGroundTruth()) if self.srl_model != "raw_pixels": return self.getSRLState(self._observation), step_reward, done, {} return np.array(self._observation), step_reward, done, {} # Copied for the original Gym Racing Car env, it is modified to be able to remove the render window. def render(self, mode='human'): if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.viewer.window.set_visible(self._renders) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window if mode != 'state_pixels': win.switch_to() win.dispatch_events() if mode == "rgb_array" or mode == "state_pixels": win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H else: VP_W = STATE_W VP_H = STATE_H gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self.render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] # agent can call or not call env.render() itself when recording video. if mode == "rgb_array" and not self.human_render: win.flip() if mode == 'human': self.human_render = True win.clear() t = self.transform gl.glViewport(0, 0, WINDOW_W, WINDOW_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self.render_indicators(WINDOW_W, WINDOW_H) win.flip() self.viewer.onetime_geoms = [] return arr
class RLBenchDSEnv(): """ wrapped_env is the rlbench env, passed in initialized. This class just calls actions on that env. """ def __init__(self, name="RLBenchDS-unset", wrapped_env=None, max_steps_per_epoch=100, path=None, **_kwargs): #Passed in in dataset_generator.py kwargs. self.env = wrapped_env self.max_steps_per_epoch = max_steps_per_epoch print("Recording data...") try: env_name = self.env.task.get_name() except: env_name = self.env.unwrapped.spec.id print(env_name) self.saver = EpisodeSaver(name, env_name=env_name, path=path) self.action_space = self.env.action_space def seed(self, seed=None): """ Seed random generator :param seed: (int) :return: ([int]) """ self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action, t=0): """ :action: (int) :return: (observation, int reward, bool done, dict extras) """ self.action = action self.observation, reward, done, _ = self.env.step(action) ground_truth, obs = self.observation['state'], self.observation[ 'observation'] done = done or t == self.max_steps_per_epoch self.saver.step(obs, action, reward, done, ground_truth) return self.observation, reward, done, {} @staticmethod def getGroundTruthDim(): """ :return: (int) """ return None def getGroundTruth(self): """ Don't need this method :return: (numpy array) """ return None def reset(self): """ Reset the environment :return: (numpy ndarray) first observation of the env """ self.observation = self.env.reset() ground_truth, obs = self.observation['state'], self.observation[ 'observation'] if self.saver is not None: self.saver.reset(obs, ground_truth) return self.observation def render(self, mode='rgb_array'): """ :param mode: (str) :return: (numpy array) BGR image """ return None