class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 22
        self.action_space = 4
        # We assume that a ROS node has already been created before initialising the environment

        self.running_step = rospy.get_param("/running_step")

        self.done_reward = rospy.get_param("/collision_reward")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="")

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()
        self.reset_controller_pub = rospy.Publisher('/reset_controller',
                                                    Bool,
                                                    queue_size=1)
        self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # 0st: We pause the controller
        self.pause_sim_pub.publish(Bool(True))

        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetWorld()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # close the controller
        # self.controllers_object.shut_down_controller()

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()
        # 4th: probably add the function to change the target position is the target is reached

        # reset the controllers
        self.reset_controller_pub.publish(Bool(True))
        # self.controllers_object.reset_mrm_joint_controllers()

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready()

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        rospy.logdebug("get_observations...")
        observation = self.mrm_state_object.get_observations()

        self.reset_controller_pub.publish(Bool(False))

        return observation

    def _step(self, joint_targets):
        # We move it to that pos
        self.gazebo.unpauseSim()
        self.pause_sim_pub.publish(Bool(False))

        self.mrm_joint_pubisher_object.move_joints(joint_targets)

        # Then we send the command to the robot and let it go for running_step seconds
        start = time.time()
        rospy.sleep(self.running_step)
        end = time.time()

        self.pause_sim_pub.publish(Bool(True))
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()

        return observation, reward, done, reach_target, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])

    def change_joints_init(self, diameter):
        joints_inits = {
            80: [0.28, -0.78, -0.7, 0],
            90: [0.3, -0.8, -0.75, 0],
            100: [0.4, -1.05, -0.9, 0],
            110: [0.51, -1.27, -0.95, 0],
            120: [0.53, -1.29, -0.98, 0]
        }
        self.mrm_joint_pubisher_object.init_pos = joints_inits[diameter]

    def get_joints_init(self):
        return self.mrm_joint_pubisher_object.init_pos
Ejemplo n.º 2
0
class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 22
        self.action_space = 4
        # We assume that a ROS node has already been created before initialising the environment

        # gets training parameters from param server
        self.desired_target = Point()
        self.running_step = rospy.get_param("/running_step")

        self.done_reward = rospy.get_param("/collision_reward")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="")

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()

        self.reset_controller_pub = rospy.Publisher('/reset_controller',
                                                    Bool,
                                                    queue_size=1)
        self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)

        self.moco = model_control()

        self.pipe_angle = 0.0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # 0st: We pause the controller
        self.pause_sim_pub.publish(Bool(True))

        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetWorld()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()
        # 4th: probably add the function to change the target position is the target is reached
        self.reset_controller_pub.publish(Bool(True))

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready()

        # delete the old target
        self.moco.delete_model('target')
        # spawn the new target
        target_pos = self.change_target_pos(self.moco, self.pipe_angle)
        # set the target point for the gym env
        self.set_target_position([target_pos[0], target_pos[1], 0.0])

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        rospy.logdebug("get_observations...")
        observation = self.mrm_state_object.get_observations()

        self.reset_controller_pub.publish(Bool(False))

        return observation

    def _step(self, joint_targets):
        # We move it to that pos
        self.gazebo.unpauseSim()
        self.pause_sim_pub.publish(Bool(False))

        self.mrm_joint_pubisher_object.move_joints(joint_targets)

        start = time.time()
        # Then we send the command to the robot and let it go for running_step seconds
        rospy.sleep(self.running_step)
        end = time.time()

        self.pause_sim_pub.publish(Bool(True))
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()
        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()

        if end - start > 1.6:  # if the real time spent on one step is too long, it means the robot is heavily stuck in the pipe, we should discard this episode
            stuck = True
        else:
            stuck = False

        return observation, reward, done, reach_target, stuck, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])

    def change_target_pos(self, moco, angle):
        # how far will be the target away from the turning center approximately
        dis_target_left = [0.22, 0.23]

        b = 0.07 + random.uniform(0.0, 0.018)

        dis_target = np.asarray(
            random.uniform(dis_target_left[0], dis_target_left[1]))

        dis_target += (-0.018 + (angle - 60) / 10 * 0.006)

        target_pos = [
            0.07 + dis_target * cos((angle - 90.0) / 180 * pi) - b * sin(
                (angle - 90.0) / 180 * pi), -0.31 - dis_target * sin(
                    (angle - 90.0) / 180 * pi) - b * cos(
                        (angle - 90.0) / 180 * pi)
        ]

        target_name = 'target'  # the model name and namespace
        targetdir = '/home/joshua/high_fidelity_exp/src/mrm_training/urdf/target.xacro'
        # visulize the target in Gazebo
        moco.spawn_model(targetdir, target_name, target_name,
                         [target_pos[0], target_pos[1], 0.0], [0, 0, 0])
        return target_pos

    def change_joints_init(self, diameter):
        joints_inits = {
            80: [0.28, -0.78, -0.7, 0],
            90: [0.3, -0.8, -0.75, 0],
            100: [0.4, -1.05, -0.9, 0],
            110: [0.51, -1.27, -0.95, 0],
            120: [0.51, -1.27, -0.95, 0]
        }
        self.mrm_joint_pubisher_object.init_pos = joints_inits[diameter]
Ejemplo n.º 3
0
class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 44
        self.action_space = 4  # joint efforts
        # We assume that a ROS node has already been created before initialising the environment

        self.running_step = rospy.get_param("/running_step")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        # self.inr_r3 = rospy.get_param("/inr_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()

        self.reset_controller_pub = rospy.Publisher('/reset_controller',
                                                    Bool,
                                                    queue_size=1)
        self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)

        # list for recording the minimal value from the laser
        self.mini_laser = []

        self.moco = model_control()

        self.pipe_angle = 0.0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        # 0st: We pause the controller
        self.pause_sim_pub.publish(Bool(True))

        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetWorld()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()

        # reset the controllers
        self.reset_controller_pub.publish(Bool(True))

        # 6th: We restore the gravity to original and unpause the sim
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # Get the state of the Robot defined by the XY distances from the
        # desired point, jointState of the four joints and
        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready(
        )  # this step is very important!!!! the system will be easily failed if it is removed

        # delete the old target
        self.moco.delete_model('target')
        # spawn the new target
        target_pos = self.change_target_pos(self.moco, self.pipe_angle)
        # set the target point for the gym env
        self.set_target_position([target_pos[0], target_pos[1], 0.0])

        # We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        self.mrm_state_object.history = []  # clear the history
        observation = self.mrm_state_object.get_observations()
        observation.extend(
            [0.0, 0.0, 0.0,
             0.0])  # assume the previous action is 0 for the first step

        self.reset_controller_pub.publish(Bool(False))

        return observation

    def _step(self, action):
        # We move it to that pos
        self.gazebo.unpauseSim()
        self.pause_sim_pub.publish(Bool(False))

        self.mrm_joint_pubisher_object.move_joints(action)

        start = time.time()
        rospy.sleep(self.running_step)
        end = time.time()

        self.pause_sim_pub.publish(Bool(True))
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()
        observation.extend(action)  # add previous action to the state

        # record the minimal value of laser data every time step
        # laser_data = observation[8:18]
        # self.mini_laser.append(min(laser_data))

        if end - start > 1.6:
            stuck = True
        else:
            stuck = False

        return observation, reward, done, reach_target, stuck, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])

    def change_target_pos(self, moco, angle):
        # how far will be the target away from the turning center approximately
        dis_target_left = [0.22, 0.23]

        b = 0.07 + random.uniform(0.0, 0.018)

        dis_target = np.asarray(
            random.uniform(dis_target_left[0], dis_target_left[1]))

        dis_target += (-0.018 + (angle - 60) / 10 * 0.006)

        target_pos = [
            0.07 + dis_target * cos((angle - 90.0) / 180 * pi) - b * sin(
                (angle - 90.0) / 180 * pi), -0.31 - dis_target * sin(
                    (angle - 90.0) / 180 * pi) - b * cos(
                        (angle - 90.0) / 180 * pi)
        ]
        # add some noise to vertical position
        # target_pos[1] -= random.uniform(0.04, 0.07)

        target_name = 'target'  # the model name and namespace
        targetdir = '/home/joshua/robot_arm_ws/src/mrm_training_force/urdf/target.xacro'
        # visulize the target in Gazebo
        moco.spawn_model(targetdir, target_name, target_name,
                         [target_pos[0], target_pos[1], 0.0], [0, 0, 0])
        return target_pos
class MrmEnv(gym.Env):

    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 44
        self.action_space = 4         # joint efforts
        # We assume that a ROS node has already been created before initialising the environment

        # gets training parameters from param server
        self.desired_target = Point()
        self.desired_target.x = rospy.get_param("/desired_target_point/x")
        self.desired_target.y = rospy.get_param("/desired_target_point/y")
        self.desired_target.z = rospy.get_param("/desired_target_point/z")
        self.running_step = rospy.get_param("/running_step")


        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        self.inr_r3 = rospy.get_param("/inr_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.mrm_state_object = MrmState( weight_r1=self.weight_r1,
                                          weight_r2=self.weight_r2,
                                          weight_r3=self.weight_r3)

        self.mrm_state_object.set_desired_target_point(self.desired_target.x,
                                                          self.desired_target.y,
                                                          self.desired_target.z)

        self.mrm_joint_pubisher_object = JointPub()

        self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1)
        self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)

        self._seed()

        # list for recording the minimal value from the laser
        self.mini_laser = []

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

         # 0st: We pause the controller
        self.pause_sim_pub.publish(Bool(True))

        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetWorld()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()

         # reset the controllers
        self.reset_controller_pub.publish(Bool(True))

        # 6th: We restore the gravity to original and unpause the sim
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # Get the state of the Robot defined by the XY distances from the
        # desired point, jointState of the four joints and
        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready()   # this step is very important!!!! the system will be easily failed if it is removed

        # We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        self.mrm_state_object.history = []   # clear the history
        observation = self.mrm_state_object.get_observations()
        observation.extend([0.0,0.0,0.0,0.0])  # assume the previous action is 0 for the first step

        self.reset_controller_pub.publish(Bool(False))

        return observation

    def _step(self, action):
        # We move it to that pos
        self.gazebo.unpauseSim()
        self.pause_sim_pub.publish(Bool(False))

        self.mrm_joint_pubisher_object.move_joints(action)
        # Then we send the command to the robot and let it go for running_step seconds
        start = time.time()
        rospy.sleep(self.running_step)
        end = time.time()

        self.pause_sim_pub.publish(Bool(True))
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()
        observation.extend(action)   # add previous action to the state

        if end - start > 1.0:
            stuck = True
        else:
            stuck = False

        return observation, reward, done, reach_target, stuck, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2])
class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 44
        self.action_space = 4  # joint efforts
        # We assume that a ROS node has already been created before initialising the environment

        # gets training parameters from param server
        self.desired_target = Point()
        self.desired_target.x = rospy.get_param("/desired_target_point/x")
        self.desired_target.y = rospy.get_param("/desired_target_point/y")
        self.desired_target.z = rospy.get_param("/desired_target_point/z")
        self.running_step = rospy.get_param("/running_step")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        self.inr_r3 = rospy.get_param("/inr_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_state_object.set_desired_target_point(self.desired_target.x,
                                                       self.desired_target.y,
                                                       self.desired_target.z)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()

        self.passed_episode = 0
        self.total_episode = rospy.get_param("/total_episode")

        # list for recording the minimal value from the laser
        self.mini_laser = []

        self.obstacle_penalty = 0.0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        self.passed_episode += 1

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()

        # change the weights of rewards to enable curriculum learning
        self.weight_r3 **= self.inr_r3
        #self.weight_r3 = 1 - np.exp(-90 / (self.total_episode - self.passed_episode))

        self.mrm_state_object.change_weights(weight_r1=self.weight_r1,
                                             weight_r2=self.weight_r2,
                                             weight_r3=self.weight_r3)

        # Get the state of the Robot defined by the XY distances from the
        # desired point, jointState of the four joints and
        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready(
        )  # this step is very important!!!! the system will be easily failed if it is removed

        # 6th: We restore the gravity to original and unpause the sim
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        self.mrm_state_object.history = []  # clear the history
        observation = self.mrm_state_object.get_observations()
        observation.extend(
            [0.0, 0.0, 0.0,
             0.0])  # assume the previous action is 0 for the first step

        return observation

    def _step(self, action):
        # We apply torques to each joint for a duration of time; in order to apply torques to each joint at the same time, we call the service while
        # gazebo is paused
        self.mrm_joint_pubisher_object.apply_joints_effort(
            action, self.running_step)
        # unpasue the simulation and start applying the effort
        self.gazebo.unpauseSim()
        rospy.sleep(self.running_step)
        # pause the simelation again to process data
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()
        observation.extend(action)  # add previous action to the state

        # record the minimal value of laser data every time step
        laser_data = observation[8:18]
        self.mini_laser.append(min(laser_data))
        return observation, reward, done, reach_target, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])