예제 #1
0
class MonopedEnv(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment

        # gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_height = rospy.get_param("/max_height")
        self.min_height = rospy.get_param("/min_height")
        self.joint_increment_value = rospy.get_param("/joint_increment_value")
        self.done_reward = rospy.get_param("/done_reward")
        self.alive_reward = rospy.get_param("/alive_reward")
        self.desired_force = rospy.get_param("/desired_force")
        self.desired_yaw = rospy.get_param("/desired_yaw")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        self.weight_r4 = rospy.get_param("/weight_r4")
        self.weight_r5 = rospy.get_param("/weight_r5")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="monoped")

        self.monoped_state_object = MonopedState(
            max_height=self.max_height,
            min_height=self.min_height,
            abs_max_roll=self.max_incl,
            abs_max_pitch=self.max_incl,
            joint_increment_value=self.joint_increment_value,
            done_reward=self.done_reward,
            alive_reward=self.alive_reward,
            desired_force=self.desired_force,
            desired_yaw=self.desired_yaw,
            weight_r1=self.weight_r1,
            weight_r2=self.weight_r2,
            weight_r3=self.weight_r3,
            weight_r4=self.weight_r4,
            weight_r5=self.weight_r5)

        self.monoped_state_object.set_desired_world_point(
            self.desired_pose.position.x, self.desired_pose.position.y,
            self.desired_pose.position.z)

        self.monoped_joint_pubisher_object = JointPub()
        """
        For this version, we consider 6 actions
        1-2) Increment/Decrement haa_joint
        3-4) Increment/Decrement hfe_joint
        5-6) Increment/Decrement kfe_joint
        """
        self.action_space = spaces.Discrete(6)
        self.reward_range = (-np.inf, np.inf)

        self._seed()

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        # 0st: We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetSim()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        rospy.logdebug("reset_monoped_joint_controllers...")
        self.controllers_object.reset_monoped_joint_controllers()

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_pose...")
        self.monoped_joint_pubisher_object.set_init_pose()

        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
        rospy.logdebug("check_all_systems_ready...")
        self.monoped_state_object.check_all_systems_ready()
        rospy.logdebug("get_observations...")
        observation = self.monoped_state_object.get_observations()

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # Get the State Discrete Stringuified version of the observations
        state = self.get_state(observation)

        return state

    def _step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, decide which action corresponsd to which joint is incremented
        next_action_position = self.monoped_state_object.get_action_to_position(
            action)

        # We move it to that pos
        self.gazebo.unpauseSim()
        self.monoped_joint_pubisher_object.move_joints(next_action_position)
        # Then we send the command to the robot and let it go
        # for running_step seconds
        time.sleep(self.running_step)
        self.gazebo.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        observation = self.monoped_state_object.get_observations()

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.monoped_state_object.process_data()

        # Get the State Discrete Stringuified version of the observations
        state = self.get_state(observation)

        return state, reward, done, {}

    def get_state(self, observation):
        """
        We retrieve the Stringuified-Discrete version of the given observation
        :return: state
        """
        return self.monoped_state_object.get_state_as_string(observation)
        time.sleep(wait_time * 10)

        rospy.loginfo("END CHECKING SENSOR DATA")
        debug_object.check_all_systems_ready()
        rospy.loginfo("CLOCK BEFORE RESET")
        debug_object.get_clock_time()

        rospy.loginfo("SETTING INITIAL POSE TO AVOID")
        debug_object.set_init_pose()
        time.sleep(wait_time * 2.0)
        rospy.loginfo("AFTER INITPOSE CHECKING SENSOR DATA")
        debug_object.check_all_systems_ready()
        rospy.loginfo(
            "We deactivate gravity to check any reasidual effect of reseting the simulation"
        )
        gazebo.change_gravity(0.0, 0.0, 0.0)

        rospy.loginfo("RESETING SIMULATION")
        gazebo.pauseSim()
        gazebo.resetSim()
        gazebo.unpauseSim()
        rospy.loginfo("CLOCK AFTER RESET")
        debug_object.get_clock_time()

        rospy.loginfo(
            "RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
        controllers_object.reset_cartpole_joint_controllers()
        rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
        debug_object.check_all_systems_ready()
        rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
        debug_object.get_clock_time()
class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 44
        self.action_space = 4  # joint efforts
        # We assume that a ROS node has already been created before initialising the environment

        self.running_step = rospy.get_param("/running_step")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        # self.inr_r3 = rospy.get_param("/inr_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()

        self.reset_controller_pub = rospy.Publisher('/reset_controller',
                                                    Bool,
                                                    queue_size=1)
        self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)

        # list for recording the minimal value from the laser
        self.mini_laser = []

        self.moco = model_control()

        self.pipe_angle = 0.0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        # 0st: We pause the controller
        self.pause_sim_pub.publish(Bool(True))

        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetWorld()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()

        # reset the controllers
        self.reset_controller_pub.publish(Bool(True))

        # 6th: We restore the gravity to original and unpause the sim
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # Get the state of the Robot defined by the XY distances from the
        # desired point, jointState of the four joints and
        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready(
        )  # this step is very important!!!! the system will be easily failed if it is removed

        # delete the old target
        self.moco.delete_model('target')
        # spawn the new target
        target_pos = self.change_target_pos(self.moco, self.pipe_angle)
        # set the target point for the gym env
        self.set_target_position([target_pos[0], target_pos[1], 0.0])

        # We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        self.mrm_state_object.history = []  # clear the history
        observation = self.mrm_state_object.get_observations()
        observation.extend(
            [0.0, 0.0, 0.0,
             0.0])  # assume the previous action is 0 for the first step

        self.reset_controller_pub.publish(Bool(False))

        avg_laser = 0.0
        # average the recorded minimal distance from the end-effector to the pipe
        if len(self.mini_laser) > 0:
            avg_laser = np.mean(self.mini_laser)
            self.mini_laser = []

        return observation, avg_laser

    def _step(self, action):
        # We move it to that pos
        self.gazebo.unpauseSim()
        self.pause_sim_pub.publish(Bool(False))

        self.mrm_joint_pubisher_object.move_joints(action)

        start = time.time()
        rospy.sleep(self.running_step)
        end = time.time()

        self.pause_sim_pub.publish(Bool(True))
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()
        observation.extend(action)  # add previous action to the state

        # record the minimal value of laser data every time step
        laser_data = observation[8:18]
        self.mini_laser.append(min(laser_data))

        if end - start > 1.0:
            stuck = True
        else:
            stuck = False

        return observation, reward, done, reach_target, stuck, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])

    def change_target_pos(self, moco, angle):
        # how far will be the target away from the turning center approximately
        dis_target_left = [0.23, 0.26]
        dis_target = random.uniform(dis_target_left[0], dis_target_left[1])
        target_pos = [
            0.06 + dis_target * cos((angle - 90.0) / 180 * pi),
            -0.34 - dis_target * sin((angle - 90.0) / 180 * pi)
        ]
        # add some noise to vertical position
        target_pos[1] -= random.uniform(0.04, 0.07)

        target_name = 'target'  # the model name and namespace
        targetdir = '/home/joshua/robot_arm_ws/src/mrm_training_force/urdf/target.xacro'
        # visulize the target in Gazebo
        moco.spawn_model(targetdir, target_name, target_name,
                         [target_pos[0], target_pos[1], 0.0], [0, 0, 0])
        return target_pos
class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 44
        self.action_space = 4  # joint efforts
        # We assume that a ROS node has already been created before initialising the environment

        # gets training parameters from param server
        self.desired_target = Point()
        self.desired_target.x = rospy.get_param("/desired_target_point/x")
        self.desired_target.y = rospy.get_param("/desired_target_point/y")
        self.desired_target.z = rospy.get_param("/desired_target_point/z")
        self.running_step = rospy.get_param("/running_step")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        self.inr_r3 = rospy.get_param("/inr_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_state_object.set_desired_target_point(self.desired_target.x,
                                                       self.desired_target.y,
                                                       self.desired_target.z)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()

        self.passed_episode = 0
        self.total_episode = rospy.get_param("/total_episode")

        # list for recording the minimal value from the laser
        self.mini_laser = []

        self.obstacle_penalty = 0.0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        self.passed_episode += 1

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()

        # change the weights of rewards to enable curriculum learning
        self.weight_r3 **= self.inr_r3
        #self.weight_r3 = 1 - np.exp(-90 / (self.total_episode - self.passed_episode))

        self.mrm_state_object.change_weights(weight_r1=self.weight_r1,
                                             weight_r2=self.weight_r2,
                                             weight_r3=self.weight_r3)

        # Get the state of the Robot defined by the XY distances from the
        # desired point, jointState of the four joints and
        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready(
        )  # this step is very important!!!! the system will be easily failed if it is removed

        # 6th: We restore the gravity to original and unpause the sim
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        self.mrm_state_object.history = []  # clear the history
        observation = self.mrm_state_object.get_observations()
        observation.extend(
            [0.0, 0.0, 0.0,
             0.0])  # assume the previous action is 0 for the first step

        return observation

    def _step(self, action):
        # We apply torques to each joint for a duration of time; in order to apply torques to each joint at the same time, we call the service while
        # gazebo is paused
        self.mrm_joint_pubisher_object.apply_joints_effort(
            action, self.running_step)
        # unpasue the simulation and start applying the effort
        self.gazebo.unpauseSim()
        rospy.sleep(self.running_step)
        # pause the simelation again to process data
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()
        observation.extend(action)  # add previous action to the state

        # record the minimal value of laser data every time step
        laser_data = observation[8:18]
        self.mini_laser.append(min(laser_data))
        return observation, reward, done, reach_target, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])
class MonopedEnv(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment

        # gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_height = rospy.get_param("/max_height")
        self.min_height = rospy.get_param("/min_height")
        self.joint_increment_value = rospy.get_param("/joint_increment_value")
        self.done_reward = rospy.get_param("/done_reward")
        self.alive_reward = rospy.get_param("/alive_reward")
        self.desired_force = rospy.get_param("/desired_force")
        self.desired_yaw = rospy.get_param("/desired_yaw")

        self.list_of_observations = rospy.get_param("/list_of_observations")

        haa_max = rospy.get_param("/joint_limits_array/haa_max")
        haa_min = rospy.get_param("/joint_limits_array/haa_min")
        hfe_max = rospy.get_param("/joint_limits_array/hfe_max")
        hfe_min = rospy.get_param("/joint_limits_array/hfe_min")
        kfe_max = rospy.get_param("/joint_limits_array/kfe_max")
        kfe_min = rospy.get_param("/joint_limits_array/kfe_min")
        self.joint_limits = {
            "haa_max": haa_max,
            "haa_min": haa_min,
            "hfe_max": hfe_max,
            "hfe_min": hfe_min,
            "kfe_max": kfe_max,
            "kfe_min": kfe_min
        }

        self.discrete_division = rospy.get_param("/discrete_division")

        self.maximum_base_linear_acceleration = rospy.get_param(
            "/maximum_base_linear_acceleration")
        self.maximum_base_angular_velocity = rospy.get_param(
            "/maximum_base_angular_velocity")
        self.maximum_joint_effort = rospy.get_param("/maximum_joint_effort")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        self.weight_r4 = rospy.get_param("/weight_r4")
        self.weight_r5 = rospy.get_param("/weight_r5")

        haa_init_value = rospy.get_param("/init_joint_pose/haa")
        hfe_init_value = rospy.get_param("/init_joint_pose/hfe")
        kfe_init_value = rospy.get_param("/init_joint_pose/kfe")
        self.init_joint_pose = [haa_init_value, hfe_init_value, kfe_init_value]

        # Fill in the Done Episode Criteria list
        self.episode_done_criteria = rospy.get_param("/episode_done_criteria")

        # Jump Increment Value in Radians
        self.jump_increment = rospy.get_param("/jump_increment")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="monoped")

        self.monoped_state_object = MonopedState(
            max_height=self.max_height,
            min_height=self.min_height,
            abs_max_roll=self.max_incl,
            abs_max_pitch=self.max_incl,
            joint_increment_value=self.joint_increment_value,
            list_of_observations=self.list_of_observations,
            joint_limits=self.joint_limits,
            episode_done_criteria=self.episode_done_criteria,
            done_reward=self.done_reward,
            alive_reward=self.alive_reward,
            desired_force=self.desired_force,
            desired_yaw=self.desired_yaw,
            weight_r1=self.weight_r1,
            weight_r2=self.weight_r2,
            weight_r3=self.weight_r3,
            weight_r4=self.weight_r4,
            weight_r5=self.weight_r5,
            discrete_division=self.discrete_division,
            maximum_base_linear_acceleration=self.
            maximum_base_linear_acceleration,
            maximum_base_angular_velocity=self.maximum_base_angular_velocity,
            maximum_joint_effort=self.maximum_joint_effort,
            jump_increment=self.jump_increment)

        self.monoped_state_object.set_desired_world_point(
            self.desired_pose.position.x, self.desired_pose.position.y,
            self.desired_pose.position.z)

        self.monoped_joint_pubisher_object = JointPub()
        """
        For this version, we consider 5 actions
        1-2) Increment/Decrement haa_joint
        3-4) Increment/Decrement hfe_joint
        5) Dont Move
        6) Perform One Jump
        """
        self.action_space = spaces.Discrete(6)
        self.reward_range = (-np.inf, np.inf)

        self._seed()

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        # 0st: We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetSim()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        rospy.logdebug("reset_monoped_joint_controllers...")
        self.controllers_object.reset_monoped_joint_controllers()

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_pose init variable...>>>" +
                       str(self.init_joint_pose))
        # We save that position as the current joint desired position
        init_pos = self.monoped_state_object.init_joints_pose(
            self.init_joint_pose)

        # 4th: We Set the init pose to the jump topic so that the jump control can update
        rospy.logdebug("Publish init_pose for Jump Control...>>>" +
                       str(init_pos))
        # We check the jump publisher has connection
        self.monoped_joint_pubisher_object.check_publishers_connection()
        # We move the joints to position, no jump
        do_jump = False
        self.monoped_joint_pubisher_object.move_joints_jump(init_pos, do_jump)

        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
        rospy.logdebug("check_all_systems_ready...")
        self.monoped_state_object.check_all_systems_ready()

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # 8th: Get the State Discrete Stringuified version of the observations
        rospy.logdebug("get_observations...")
        observation = self.monoped_state_object.get_observations()
        state = self.get_state(observation)

        return state

    def _step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, decide which action corresponds to which joint is incremented
        next_action_position, do_jump = self.monoped_state_object.get_action_to_position(
            action)

        # We move it to that pos
        self.gazebo.unpauseSim()
        self.monoped_joint_pubisher_object.move_joints_jump(
            next_action_position, do_jump)
        # Then we send the command to the robot and let it go
        # for running_step seconds
        time.sleep(self.running_step)
        self.gazebo.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        observation = self.monoped_state_object.get_observations()

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.monoped_state_object.process_data()

        # Get the State Discrete Stringuified version of the observations
        state = self.get_state(observation)

        return state, reward, done, {}

    def get_state(self, observation):
        """
        We retrieve the Stringuified-Discrete version of the given observation
        :return: state
        """
        return self.monoped_state_object.get_state_as_string(observation)
class multi_UAV_Env(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment
        self.set_link = rospy.ServiceProxy('/gazebo/set_link_state',
                                           SetLinkState)
        rospy.wait_for_service('/gazebo/set_link_state')
        # gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.x = rospy.get_param(
            "/desired_pose/position/x")
        self.desired_pose.position.y = rospy.get_param(
            "/desired_pose/position/y")
        self.desired_pose.position.z = rospy.get_param(
            "/desired_pose/position/z")
        self.desired_pose.orientation.x = rospy.get_param(
            "/desired_pose/orientation/x")
        self.desired_pose.orientation.y = rospy.get_param(
            "/desired_pose/orientation/y")
        self.desired_pose.orientation.z = rospy.get_param(
            "/desired_pose/orientation/z")

        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_vel = rospy.get_param("/max_vel")
        self.min_vel = rospy.get_param("/min_vel")
        self.max_acc = rospy.get_param("/max_acc")
        self.min_acc = rospy.get_param("/min_acc")
        self.max_jerk = rospy.get_param("/max_jerk")
        self.min_jerk = rospy.get_param("/min_jerk")
        self.max_snap = rospy.get_param("/max_snap")
        self.min_snap = rospy.get_param("/min_snap")

        self.done_reward = rospy.get_param("/done_reward")
        self.alive_reward = rospy.get_param("/alive_reward")

        self.num_action_space = 15

        high = np.array([1. for _ in range(self.num_action_space)])
        low = np.array([-1. for _ in range(self.num_action_space)])
        self.action_space = spaces.Box(low=low, high=high)

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="monoped")

        self.multi_uav_state_object = MultiUavState(
            max_vel=self.max_vel,
            min_vel=self.min_vel,
            max_acc=self.max_acc,
            min_acc=self.min_acc,
            max_jerk=self.max_jerk,
            min_jerk=self.min_jerk,
            max_snap=self.max_snap,
            min_snap=self.min_snap,
            abs_max_roll=self.max_incl,
            abs_max_pitch=self.max_incl,
            done_reward=self.done_reward,
            alive_reward=self.alive_reward)

        self.multi_uav_state_object.set_desired_world_point(
            self.desired_pose.position.x, self.desired_pose.position.y,
            self.desired_pose.position.z, self.desired_pose.orientation.x,
            self.desired_pose.orientation.y, self.desired_pose.orientation.z)

        self.mellingers = [
            MellingerMARL('hummingbird', 0, 0.95, 0.0, 0.35),
            MellingerMARL('hummingbird', 1, 0.0, 0.95, 0.35),
            MellingerMARL('hummingbird', 2, -0.95, 0.0, 0.35),
            MellingerMARL('hummingbird', 3, 0.0, -0.95, 0.35)
        ]
        """
        For this version, we consider 6 actions
        1-2) Increment/Decrement haa_joint
        3-4) Increment/Decrement hfe_joint
        5-6) Increment/Decrement kfe_joint
        """
        self.action_space = spaces.Discrete(6)
        self.reward_range = (-np.inf, np.inf)

        self._seed()
        print("end of init...")

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # 0st: We pause the Simulator
        print("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        print("Reset SIM...")
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()

        # model_name = "four_quad_payload"
        # rospy.wait_for_service('/gazebo/delete_model')
        # del_model_prox = rospy.ServiceProxy('gazebo/delete_model', DeleteModel)
        # del_model_prox(model_name)

        # model_xml = rospy.get_param("robot_description")
        # pose = Pose()
        # pose.orientation.w = 1.0

        # spawn_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel)
        # req = SpawnModelRequest()
        # req.model_name = model_name
        # req.model_xml = model_xml
        # # should this be unique so ros_control can use each individually?
        # req.robot_namespace = "/foo"
        # req.initial_pose = pose
        # resp = spawn_model(req)

        self.gazebo.resetWorld()

        # os.system('gnome-terminal -x roslaunch rotors_gazebo spawn_quad_sphere_load.launch')

        # initial_pose = Pose()
        # initial_pose.position.x = 0
        # initial_pose.position.y = 0
        # initial_pose.position.z = 0

        # f = open('/home/icl-baby/catkin_ws/src/Collaborative_Aerial_Transportation/rotors_description/urdf/collaborative/four_hummingbirds_payload.xacro')
        # urdf = f.read()
        # rospy.wait_for_service('gazebo/spawn_urdf_model')
        # spawn_model_prox = rospy.ServiceProxy('gazebo/spawn_urdf_model', SpawnModel)
        # spawn_model_prox("four_quad_payload", urdf, "hummingbird", initial_pose, "world")

        # for i in range(4):
        #     rospy.wait_for_service('/gazebo/set_link_state')
        #     link_imu_name = 'hummingbird_' + str(i) + '/imugt_link'
        #     self.set_link(LinkState(link_name=link_imu_name))
        #     for j in range(4):
        #         rospy.wait_for_service('/gazebo/set_link_state')
        #         link_name = 'hummingbird_' + str(i) + '/rotor_' + str(j)
        #         print link_name
        #         self.set_link(LinkState(link_name=link_name))
        #     rospy.wait_for_service('/gazebo/set_link_state')
        #     link_odom_name = 'hummingbird_' + str(i) + '/odometry_sensorgt_link'
        #     self.set_link(LinkState(link_name=link_odom_name))

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        print("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.8)

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        # rospy.logdebug("reset_monoped_joint_controllers...")
        # self.controllers_object.reset_monoped_joint_controllers()

        # 3rd: resets the robot to initial conditions
        # print("set_init_pose...")
        # for controller in self.mellingers:
        #     motor_speed = np.zeros((4, 1))
        #     controller.set_desired_motor_speed(motor_speed)
        # controller.send_motor_command()

        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
        print("check_all_systems_ready...")
        self.multi_uav_state_object.check_all_systems_ready()
        # rospy.logdebug("get_observations...")
        # observation = self.multi_uav_state_object.get_states()

        # 6th: We restore the gravity to original
        print("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # 7th: pauses simulation
        print("Pause SIM...")
        self.gazebo.pauseSim()

        # Get the State Discrete Stringuified version of the observations
        state = self.get_state()

        return state

    def _step(self, actions):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, decide which action corresponsd to which joint is incremented

        # next_action_position = self.multi_uav_state_object.get_action_to_position(action)

        # update the reference in the mellinger controller from the action
        for i, action in enumerate(actions):
            self.mellingers[i].set_ref_from_action(action)

        # We move it to that pos
        self.gazebo.unpauseSim()
        # action to the gazebo environment
        for i_mellinger in self.mellingers:
            i_mellinger.publish_err()
            i_mellinger.update_current_state()
            i_mellinger.update_desired_values()
            i_mellinger.motorSpeedFromU()
            i_mellinger.send_motor_command()

        # self.monoped_joint_pubisher_object.move_joints(next_action_position)
        # Then we send the command to the robot and let it go
        # for running_step seconds
        time.sleep(self.running_step)
        self.gazebo.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        # observation = self.multi_uav_state_object.get_observations()

        # finally we get an evaluation based on what happened in the sim
        # reward,done = self.multi_uav_state_object.process_data()

        reward, done = self.multi_uav_state_object.calculate_reward_payload_orientation(
        )

        # Get the State Discrete Stringuified version of the observations
        # state = self.get_state(observation)
        state = self.get_state()

        return state, reward, done

    def get_state(self):
        """
        We retrieve the Stringuified-Discrete version of the given observation
        :return: state
        """
        # return self.multi_uav_state_object.get_state_as_string(observation)

        return self.multi_uav_state_object.get_states()