class ReacherEnv(object):

    def __init__(self):
        self.pr = PyRep()
        self.pr.launch(SCENE_FILE, headless=False)
        self.pr.start()
        self.agent = Panda()
        self.agent.set_control_loop_enabled(False)
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')
        self.agent_ee_tip = self.agent.get_tip()
        self.initial_joint_positions = self.agent.get_joint_positions()

    def _get_state(self):
        # Return state containing arm joint angles/velocities & target position
        return np.concatenate([self.agent.get_joint_positions(),
                               self.agent.get_joint_velocities(),
                               self.target.get_position()])

    def reset(self):
        # Get a random position within a cuboid and set the target position
        pos = list(np.random.uniform(POS_MIN, POS_MAX))
        self.target.set_position(pos)
        self.agent.set_joint_positions(self.initial_joint_positions)
        return self._get_state()

    def step(self, action):
        self.agent.set_joint_target_velocities(action)  # Execute action on arm
        self.pr.step()  # Step the physics simulation
        ax, ay, az = self.agent_ee_tip.get_position()
        tx, ty, tz = self.target.get_position()
        # Reward is negative distance to target
        reward = -np.sqrt((ax - tx) ** 2 + (ay - ty) ** 2 + (az - tz) ** 2)
        return reward, self._get_state()

    def shutdown(self):
        self.pr.stop()
        self.pr.shutdown()
Beispiel #2
0
class Env:
    def __init__(self, cfg):
        self.enabled_joints = cfg.agent.enabled_joints
        self._launch(cfg.env.env_path, cfg.env.headless)
        self._setup_robot()
        self._setup_vision("Vision_sensor")
        self._setup_actions(cfg.agent.n_discrete_actions, cfg.agent.vel_min,
                            cfg.agent.vel_max)
        self._setup_debug_cameras("debug_vis1", "debug_vis2")
        self._setup_target()
        self._setup_distractor()
        self._setup_table()

    def _setup_distractor(self):
        self.distractor = Shape('distractor')
        self.hide_distractor()

    def _setup_table(self):
        self.table = Shape("diningTable")
        self.table_near = False
        self.table_initial_pos = self.table.get_position()

    def _setup_target(self):
        self.target = Shape('target')

    def _setup_robot(self):
        self.robot = Panda()
        self.joint_init = self.robot.get_joint_positions()
        self.robot.set_control_loop_enabled(False)
        self.robot.set_motor_locked_at_zero_velocity(True)
        self.tip = self.robot.get_tip()

    def _setup_vision(self, vis_name):
        self.vision = VisionSensor(vis_name)

    def _launch(self, path, headless):
        self.pr = PyRep()
        self.pr.launch(path, headless=headless)
        self.pr.start()

    def _setup_actions(self, n_discrete_actions, vel_min, vel_max):
        self.poss_actions = np.linspace(vel_min, vel_max, n_discrete_actions)

    def _convert_action(self, action):
        return self.poss_actions[action]

    def _setup_debug_cameras(self, name0, name1):
        self.vis_debug0 = VisionSensor(name0)
        self.vis_debug1 = VisionSensor(name1)

    def step(self, action):
        converted_action = self._convert_action(action)
        action = np.zeros(7)
        action[self.enabled_joints] = converted_action
        self.robot.set_joint_target_velocities(action)
        self.pr.step()
        reward = self._calculate_reward()
        rgb = self.vision.capture_rgb()
        reward = 0
        done = False

        # todo: change to include more meaningful info
        return rgb, reward, done, {}

    def show_distractor(self):
        self.distractor.set_position([0.15, 0., 1.])

    def hide_distractor(self):
        self.distractor.set_position([-1.5, 0., 1.])

    def toggle_table(self):
        if self.table_near:
            self._move_table_far()
        else:
            self._move_table_near()

    def _move_table_near(self):
        print("moving table near")
        pos = self.table_initial_pos
        pos[0] -= 1
        self.table.set_position(pos)

    def _move_table_far(self):
        self.table.set_position(self.table_initial_pos)

    def _calculate_reward(self):
        ax, ay, az = self.tip.get_position()
        tx, ty, tz = self.target.get_position()
        # Reward is negative distance to target
        reward = -np.sqrt((ax - tx)**2 + (ay - ty)**2 + (az - tz)**2)

        return reward

    def reset(self):
        self.robot.set_joint_positions(self.joint_init)
        rgb = self.vision.capture_rgb()

        return rgb

    def get_debug_images(self):
        debug0 = self.vis_debug0.capture_rgb()
        debug1 = self.vis_debug1.capture_rgb()

        return debug0, debug1

    def get_joint_positions(self):
        pos = np.array(self.robot.get_joint_positions())
        pos = pos[self.enabled_joints]

        return pos
Beispiel #3
0
class Environment(object):

    def __init__(self, not_render):
        super(Environment, self).__init__()
        self.controller = PyRep()
        scene_file = join(dirname(abspath(__file__)),'scene/scene_reinforcement_learning_env.ttt')
        self.controller.launch(scene_file, headless=not_render)

        self.actuator = Panda()
        self.actuator.set_control_loop_enabled(False)
        self.actuator.set_motor_locked_at_zero_velocity(True)

        self.top_camera = VisionSensor('Vision_TOP')
        self.side_camera = VisionSensor('Vision_SIDE')
        self.front_camera = VisionSensor('Vision_FRONT')

        self.target = Shape('target')
        self.target_initial_pos = self.target.get_position()
        self.start_sim()
        self.actuator_tip = self.actuator.get_tip()
        self.actuator_initial_position = self.actuator.get_joint_positions()

        self.POS_MIN = [0.8, -0.2, 1.0]
        self.POS_MAX = [1.0, 0.2, 1.4]

    def get_image(self, camera):
        view = (camera.capture_rgb()*255).round().astype(np.uint8)
        view = np.asarray(I.fromarray(view))
        return cv.cvtColor(view, cv.COLOR_BGR2GRAY)

    def get_state(self):
        positions = self.actuator.get_joint_positions()
        velocities = self.actuator.get_joint_velocities()
        target_position = self.target.get_position()
        images = (self.get_image(self.top_camera), self.get_image(self.side_camera), self.get_image(self.front_camera))
        return (positions, velocities, target_position, images)


    def done(self):

        done = [self.inside_range(  self.target.get_position()[i] - RANGE_DISCOUNT,
                                    self.target.get_position()[i] + RANGE_DISCOUNT,
                                    self.actuator.get_tip().get_position()[i]) for i in range(3)]

        return done[0] == True and (done[0]==done[1] and done[0]==done[2])

    def inside_range(self, min, max, x):
        return True if (min <= x and x <= max) else False

    def reset_scene(self):
        #new_target_pos = list(np.random.uniform(self.POS_MIN, self.POS_MAX))
        self.target.set_position(self.target_initial_pos)
        self.actuator.set_joint_positions(self.actuator_initial_position)
        return self.get_state()

    def do_step(self, action, model_string):
        self.actuator.set_joint_target_velocities(action)
        self.controller.step()


        if model_string == "base":
            return self.base_article_reward(), self.get_state()
        else:
            return self.get_reward(), self.get_state()


    def base_article_reward(self):

        if(self.done()):
            rw = BASE_REWARD
        else:
            ax, ay, az = self.actuator_tip.get_position()
            tx, ty, tz = self.target.get_position()
            rw = math.e * (-0.25 * np.sqrt((ax-tx)**2+(ay-ty)**2+(az-tz)**2))

        return rw

    def get_reward(self):
        ax, ay, az = self.actuator_tip.get_position()
        tx, ty, tz = self.target.get_position()
        rw = -np.sqrt((ax-tx)**2+(ay-ty)**2+(az-tz)**2)
        return rw

    def shutdown(self):
        return (self.controller.stop()) and (self.controller.shutdown())

    def start_sim(self):
        return self.controller.start()

    def stop_sim(self):
        return self.controller.stop()