class ReacherEnv(object): def __init__(self): self.pr = PyRep() self.pr.launch(SCENE_FILE, headless=False) self.pr.start() self.agent = Panda() self.agent.set_control_loop_enabled(False) self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') self.agent_ee_tip = self.agent.get_tip() self.initial_joint_positions = self.agent.get_joint_positions() def _get_state(self): # Return state containing arm joint angles/velocities & target position return np.concatenate([self.agent.get_joint_positions(), self.agent.get_joint_velocities(), self.target.get_position()]) def reset(self): # Get a random position within a cuboid and set the target position pos = list(np.random.uniform(POS_MIN, POS_MAX)) self.target.set_position(pos) self.agent.set_joint_positions(self.initial_joint_positions) return self._get_state() def step(self, action): self.agent.set_joint_target_velocities(action) # Execute action on arm self.pr.step() # Step the physics simulation ax, ay, az = self.agent_ee_tip.get_position() tx, ty, tz = self.target.get_position() # Reward is negative distance to target reward = -np.sqrt((ax - tx) ** 2 + (ay - ty) ** 2 + (az - tz) ** 2) return reward, self._get_state() def shutdown(self): self.pr.stop() self.pr.shutdown()
class Env: def __init__(self, cfg): self.enabled_joints = cfg.agent.enabled_joints self._launch(cfg.env.env_path, cfg.env.headless) self._setup_robot() self._setup_vision("Vision_sensor") self._setup_actions(cfg.agent.n_discrete_actions, cfg.agent.vel_min, cfg.agent.vel_max) self._setup_debug_cameras("debug_vis1", "debug_vis2") self._setup_target() self._setup_distractor() self._setup_table() def _setup_distractor(self): self.distractor = Shape('distractor') self.hide_distractor() def _setup_table(self): self.table = Shape("diningTable") self.table_near = False self.table_initial_pos = self.table.get_position() def _setup_target(self): self.target = Shape('target') def _setup_robot(self): self.robot = Panda() self.joint_init = self.robot.get_joint_positions() self.robot.set_control_loop_enabled(False) self.robot.set_motor_locked_at_zero_velocity(True) self.tip = self.robot.get_tip() def _setup_vision(self, vis_name): self.vision = VisionSensor(vis_name) def _launch(self, path, headless): self.pr = PyRep() self.pr.launch(path, headless=headless) self.pr.start() def _setup_actions(self, n_discrete_actions, vel_min, vel_max): self.poss_actions = np.linspace(vel_min, vel_max, n_discrete_actions) def _convert_action(self, action): return self.poss_actions[action] def _setup_debug_cameras(self, name0, name1): self.vis_debug0 = VisionSensor(name0) self.vis_debug1 = VisionSensor(name1) def step(self, action): converted_action = self._convert_action(action) action = np.zeros(7) action[self.enabled_joints] = converted_action self.robot.set_joint_target_velocities(action) self.pr.step() reward = self._calculate_reward() rgb = self.vision.capture_rgb() reward = 0 done = False # todo: change to include more meaningful info return rgb, reward, done, {} def show_distractor(self): self.distractor.set_position([0.15, 0., 1.]) def hide_distractor(self): self.distractor.set_position([-1.5, 0., 1.]) def toggle_table(self): if self.table_near: self._move_table_far() else: self._move_table_near() def _move_table_near(self): print("moving table near") pos = self.table_initial_pos pos[0] -= 1 self.table.set_position(pos) def _move_table_far(self): self.table.set_position(self.table_initial_pos) def _calculate_reward(self): ax, ay, az = self.tip.get_position() tx, ty, tz = self.target.get_position() # Reward is negative distance to target reward = -np.sqrt((ax - tx)**2 + (ay - ty)**2 + (az - tz)**2) return reward def reset(self): self.robot.set_joint_positions(self.joint_init) rgb = self.vision.capture_rgb() return rgb def get_debug_images(self): debug0 = self.vis_debug0.capture_rgb() debug1 = self.vis_debug1.capture_rgb() return debug0, debug1 def get_joint_positions(self): pos = np.array(self.robot.get_joint_positions()) pos = pos[self.enabled_joints] return pos
from pyrep.robots.end_effectors.baxter_gripper import BaxterGripper from pyrep.robots.end_effectors.panda_gripper import PandaGripper # SCENE_FILE = join(dirname(abspath(__file__)), 'scene_ur3_reinforcement_learning_env.ttt') SCENE_FILE = join(dirname(abspath(__file__)), 'scene_reinforcement_learning_env.ttt') pr = PyRep() # Launch the application with a scene file that contains a robot pr.launch(SCENE_FILE, headless=False) pr.start() # Start the simulation arm = Panda() gripper = PandaGripper() arm.set_control_loop_enabled(False) # arm.set_motor_locked_at_zero_velocity(True) for i in range(300): arm.set_joint_target_velocities( list(np.random.uniform(-0.5, 0.5, size=(7, )))) pr.step() for i in range(100): gripper_state = random.random() done = False # Open the gripper to gripper_state (1 is open 0 is closed) at a velocity of 0.04. while not done: done = gripper.actuate(gripper_state, velocity=0.04) pr.step()
class Environment(object): def __init__(self, not_render): super(Environment, self).__init__() self.controller = PyRep() scene_file = join(dirname(abspath(__file__)),'scene/scene_reinforcement_learning_env.ttt') self.controller.launch(scene_file, headless=not_render) self.actuator = Panda() self.actuator.set_control_loop_enabled(False) self.actuator.set_motor_locked_at_zero_velocity(True) self.top_camera = VisionSensor('Vision_TOP') self.side_camera = VisionSensor('Vision_SIDE') self.front_camera = VisionSensor('Vision_FRONT') self.target = Shape('target') self.target_initial_pos = self.target.get_position() self.start_sim() self.actuator_tip = self.actuator.get_tip() self.actuator_initial_position = self.actuator.get_joint_positions() self.POS_MIN = [0.8, -0.2, 1.0] self.POS_MAX = [1.0, 0.2, 1.4] def get_image(self, camera): view = (camera.capture_rgb()*255).round().astype(np.uint8) view = np.asarray(I.fromarray(view)) return cv.cvtColor(view, cv.COLOR_BGR2GRAY) def get_state(self): positions = self.actuator.get_joint_positions() velocities = self.actuator.get_joint_velocities() target_position = self.target.get_position() images = (self.get_image(self.top_camera), self.get_image(self.side_camera), self.get_image(self.front_camera)) return (positions, velocities, target_position, images) def done(self): done = [self.inside_range( self.target.get_position()[i] - RANGE_DISCOUNT, self.target.get_position()[i] + RANGE_DISCOUNT, self.actuator.get_tip().get_position()[i]) for i in range(3)] return done[0] == True and (done[0]==done[1] and done[0]==done[2]) def inside_range(self, min, max, x): return True if (min <= x and x <= max) else False def reset_scene(self): #new_target_pos = list(np.random.uniform(self.POS_MIN, self.POS_MAX)) self.target.set_position(self.target_initial_pos) self.actuator.set_joint_positions(self.actuator_initial_position) return self.get_state() def do_step(self, action, model_string): self.actuator.set_joint_target_velocities(action) self.controller.step() if model_string == "base": return self.base_article_reward(), self.get_state() else: return self.get_reward(), self.get_state() def base_article_reward(self): if(self.done()): rw = BASE_REWARD else: ax, ay, az = self.actuator_tip.get_position() tx, ty, tz = self.target.get_position() rw = math.e * (-0.25 * np.sqrt((ax-tx)**2+(ay-ty)**2+(az-tz)**2)) return rw def get_reward(self): ax, ay, az = self.actuator_tip.get_position() tx, ty, tz = self.target.get_position() rw = -np.sqrt((ax-tx)**2+(ay-ty)**2+(az-tz)**2) return rw def shutdown(self): return (self.controller.stop()) and (self.controller.shutdown()) def start_sim(self): return self.controller.start() def stop_sim(self): return self.controller.stop()