예제 #1
0
class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 44
        self.action_space = 4  # joint efforts
        # We assume that a ROS node has already been created before initialising the environment

        # gets training parameters from param server
        self.running_step = rospy.get_param("/running_step")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()

        # list for recording the minimal value from the laser
        self.mini_laser = []

        self.moco = model_control()

        self.pipe_angle = 0.0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()

        # Get the state of the Robot defined by the XY distances from the
        # desired point, jointState of the four joints and
        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready(
        )  # this step is very important!!!! the system will be easily failed if it is removed

        # 6th: We restore the gravity to original and unpause the sim
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # delete the old target
        self.moco.delete_model('target')
        # spawn the new target
        target_pos = self.change_target_pos(self.moco, self.pipe_angle)
        # set the target point for the gym env
        self.set_target_position([target_pos[0], target_pos[1], 0.0])

        # We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        self.mrm_state_object.history = []  # clear the history
        observation = self.mrm_state_object.get_observations()
        observation.extend(
            [0.0, 0.0, 0.0,
             0.0])  # assume the previous action is 0 for the first step

        avg_laser = 0.0

        # average the recorded minimal distance from the end-effector to the pipe
        if len(self.mini_laser) > 0:
            avg_laser = np.mean(self.mini_laser)
            self.mini_laser = []

        return observation, avg_laser

    def _step(self, action):
        # We apply torques to each joint for a duration of time; in order to apply torques to each joint at the same time, we call the service while
        # gazebo is paused
        self.mrm_joint_pubisher_object.apply_joints_effort(
            action, self.running_step)
        # unpasue the simulation and start applying the effort
        self.gazebo.unpauseSim()

        start = time.time()
        rospy.sleep(self.running_step)
        end = time.time()
        # pause the simelation again to process data
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()
        observation.extend(action)  # add previous action to the state

        # record the minimal value of laser data every time step
        laser_data = observation[8:18]
        self.mini_laser.append(min(laser_data))

        if end - start > 1.0:
            stuck = True
        else:
            stuck = False

        return observation, reward, done, reach_target, stuck, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])

    def change_target_pos(self, moco, angle):
        # how far will be the target away from the turning center approximately
        dis_target_left = [0.23, 0.24]
        dis_target = random.uniform(dis_target_left[0], dis_target_left[1])
        target_pos = [
            0.08 + dis_target * cos((angle - 90.0) / 180 * pi),
            -0.35 - dis_target * sin((angle - 90.0) / 180 * pi)
        ]
        # add some noise to vertical position
        target_pos[1] -= random.uniform(0.04, 0.07)

        target_name = 'target'  # the model name and namespace
        targetdir = '/home/joshua/robot_arm_ws/src/mrm_training_force/urdf/target.xacro'
        # visulize the target in Gazebo
        moco.spawn_model(targetdir, target_name, target_name,
                         [target_pos[0], target_pos[1], 0.0], [0, 0, 0])
        return target_pos
class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 44
        self.action_space = 4  # joint efforts
        # We assume that a ROS node has already been created before initialising the environment

        # gets training parameters from param server
        self.desired_target = Point()
        self.desired_target.x = rospy.get_param("/desired_target_point/x")
        self.desired_target.y = rospy.get_param("/desired_target_point/y")
        self.desired_target.z = rospy.get_param("/desired_target_point/z")
        self.running_step = rospy.get_param("/running_step")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        self.inr_r3 = rospy.get_param("/inr_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_state_object.set_desired_target_point(self.desired_target.x,
                                                       self.desired_target.y,
                                                       self.desired_target.z)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()

        self.passed_episode = 0
        self.total_episode = rospy.get_param("/total_episode")

        # list for recording the minimal value from the laser
        self.mini_laser = []

        self.obstacle_penalty = 0.0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        self.passed_episode += 1

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()

        # change the weights of rewards to enable curriculum learning
        self.weight_r3 **= self.inr_r3
        #self.weight_r3 = 1 - np.exp(-90 / (self.total_episode - self.passed_episode))

        self.mrm_state_object.change_weights(weight_r1=self.weight_r1,
                                             weight_r2=self.weight_r2,
                                             weight_r3=self.weight_r3)

        # Get the state of the Robot defined by the XY distances from the
        # desired point, jointState of the four joints and
        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready(
        )  # this step is very important!!!! the system will be easily failed if it is removed

        # 6th: We restore the gravity to original and unpause the sim
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        self.mrm_state_object.history = []  # clear the history
        observation = self.mrm_state_object.get_observations()
        observation.extend(
            [0.0, 0.0, 0.0,
             0.0])  # assume the previous action is 0 for the first step

        return observation

    def _step(self, action):
        # We apply torques to each joint for a duration of time; in order to apply torques to each joint at the same time, we call the service while
        # gazebo is paused
        self.mrm_joint_pubisher_object.apply_joints_effort(
            action, self.running_step)
        # unpasue the simulation and start applying the effort
        self.gazebo.unpauseSim()
        rospy.sleep(self.running_step)
        # pause the simelation again to process data
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()
        observation.extend(action)  # add previous action to the state

        # record the minimal value of laser data every time step
        laser_data = observation[8:18]
        self.mini_laser.append(min(laser_data))
        return observation, reward, done, reach_target, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])