예제 #1
0
class RosGazeboEnv(gym.Env):
    def __init__(self,
                 ros_pkg_name,
                 launch_file,
                 start_init_physics_parameters=True,
                 reset_world_or_sim="SIMULATION"):

        # To reset Simulations
        rospy.logdebug("START init RosGazeboEnv")
        self.gazebo = GazeboConnection(start_init_physics_parameters,
                                       reset_world_or_sim)
        self.seed()

        # Set up ROS related variables
        self.episode_num = 0
        self.cumulated_episode_reward = 0
        self.reward_pub = rospy.Publisher('/openai/reward',
                                          RLExperimentInfo,
                                          queue_size=1)

        self.ros_launcher = ROSLauncher(rospackage_name=ros_pkg_name,\
                            launch_file_name=launch_file)
        self.gazebo.unpauseSim()
        self._setup_subscribers()
        self._setup_publishers()
        self._check_all_systems_ready()
        self.gazebo.pauseSim()

        rospy.logdebug("END init RosGazeboEnv and Paused Sim")

    # Env methods
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Function executed each time step.
        Here we get the action execute it in a time step and retrieve 
        the observations generated by that action.
        :param action:
        :return: obs, reward, done, info
        """
        """
        Here we should convert the action num to movement action, 
        execute the action in the simulation and get the observations 
        result of performing that action.
        """
        rospy.logdebug("START STEP OpenAIROS")

        self.gazebo.unpauseSim()
        self._set_action(action)
        self.gazebo.pauseSim()
        obs = self._get_obs()
        done = self._is_done(obs)
        info = {}
        reward = self._compute_reward(obs, done)
        self.cumulated_episode_reward += reward

        rospy.logdebug("END STEP OpenAIROS")

        return obs, reward, done, info

    def reset(self):
        rospy.logdebug("Reseting RosGazeboEnvironment")
        self._reset_sim()
        self._init_env_variables()
        self._update_episode()
        obs = self._get_obs()
        rospy.logdebug("END Reseting RosGazeboEnvironment")
        return obs

    def close(self):
        """
        Function executed when closing the environment.
        Use it for closing GUIS and other systems that need closing.
        :return:
        """
        rospy.logdebug("Closing RosGazeboEnvironment")
        rospy.signal_shutdown("Closing RosGazeboEnvironment")

    def _update_episode(self):
        """
        Publishes the cumulated reward of the episode and
        increases the episode number by one.
        :return:
        """
        rospy.logwarn("PUBLISHING REWARD...")
        self._publish_reward_topic(self.cumulated_episode_reward,
                                   self.episode_num)
        rospy.logwarn("PUBLISHING REWARD...DONE=" +
                      str(self.cumulated_episode_reward) + ",EP=" +
                      str(self.episode_num))

        self.episode_num += 1
        self.cumulated_episode_reward = 0

    def _publish_reward_topic(self, reward, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)

    # Extension methods
    # ----------------------------

    def _reset_sim(self):
        """Resets a simulation
        """
        rospy.logdebug("RESET SIM START")
        self.gazebo.unpauseSim()
        self._check_all_systems_ready()
        self._set_init_pose()
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()
        self._check_all_systems_ready()
        self.gazebo.pauseSim()
        rospy.logdebug("RESET SIM END")
        return True

    def _set_init_pose(self):
        """Sets the Robot in its init pose
        """
        raise NotImplementedError()

    def _check_all_systems_ready(self):
        """
        Checks that all the subscribers, publishers, services and other 
        simulation systems are operational.
        """
        self._check_all_subscribers_ready()
        self._check_all_publishers_ready()
        return True

    def _check_all_subscribers_ready(self):
        """
        Checks that all the subscribers are ready for connection
        """
        raise NotImplementedError()

    def _check_all_publishers_ready(self):
        """
        Checks that all the sensors are ready for connection
        """
        raise NotImplementedError()

    def _setup_subscribers(self):
        """
        Sets up all the subscribers relating to robot state
        """
        raise NotImplementedError()

    def _setup_publishers(self):
        """
        Sets up all the publishers relating to robot state
        """
        raise NotImplementedError()

    def _check_subscriber_ready(self, name, type, timeout=5.0):
        """
        Waits for a sensor topic to get ready for connection
        """
        var = None
        while var is None and not rospy.is_shutdown():
            try:
                var = rospy.wait_for_message(name, type, timeout)
            except:
                rospy.logfatal(
                    'Sensor topic "%s" is not available. Waiting...', name)
        return var

    def _check_publisher_ready(self, name, obj, timeout=5.0):
        """
        Waits for a publisher to get response
        """
        start_time = rospy.Time.now()
        while obj.get_num_connections() == 0 and not rospy.is_shutdown():
            if (rospy.Time.now() - start_time).to_sec() >= timeout:
                rospy.logfatal('No subscriber found for publisher %s. Exiting',
                               name)

    def _check_service_ready(self, name, timeout=5.0):
        """
        Waits for a service to get ready
        """
        try:
            rospy.wait_for_service(name, timeout)
        except (rospy.ServiceException, rospy.ROSException):
            rospy.logfatal("Service %s unavailable.", name)

    def _get_obs(self):
        """
        Returns the observation.
        """
        raise NotImplementedError()

    def _init_env_variables(self):
        """
        Inits variables needed to be initialised each time we reset 
        at the start of an episode.
        """
        raise NotImplementedError()

    def _set_action(self, action):
        """
        Applies the given action to the simulation.
        """
        raise NotImplementedError()

    def _is_done(self, observations):
        """
        Indicates whether or not the episode is done ( the robot has 
        fallen for example).
        """
        raise NotImplementedError()

    def _compute_reward(self, observations, done):
        """
        Calculates the reward to give based on the observations 
        given.
        """
        raise NotImplementedError()

    def _env_setup(self, initial_qpos):
        """
        Initial configuration of the environment. Can be used to 
        configure initial state and extract information from the
        simulation.
        """
        raise NotImplementedError()
예제 #2
0
class MovingBobcatEnv(gym.Env):
    STATE_SIZE = 2
    ACTION_SIZE = 3

    def __init__(self):

        self.number_actions = rospy.get_param('/moving_cube/n_actions')
        self.field_division = rospy.get_param("/moving_cube/field_division")
        self.seed()
        self.viewer = None

        self.action_space = spaces.Box(0.0,
                                       0.5, (self.ACTION_SIZE, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(-np.inf,
                                            +np.inf,
                                            shape=(self.STATE_SIZE, ),
                                            dtype=np.float32)

        # get configuration parameters
        self.init_roll_vel = rospy.get_param('/moving_cube/init_roll_vel')
        self.get_link_state = rospy.ServiceProxy("/gazebo/get_link_state",
                                                 GetLinkState)

        # Actions
        self.roll_speed_fixed_value = rospy.get_param(
            '/moving_cube/roll_speed_fixed_value')
        self.roll_speed_increment_value = rospy.get_param(
            '/moving_cube/roll_speed_increment_value')

        self.start_point = Point()
        self.start_point.x = rospy.get_param("/moving_cube/init_cube_pose/x")
        self.start_point.y = rospy.get_param("/moving_cube/init_cube_pose/y")
        self.start_point.z = rospy.get_param("/moving_cube/init_cube_pose/z")
        self.hyd = 0
        self.bucket_vel = 0
        self.depth = 0
        self.m = (2.1213) / (1.9213 + 0.2)  # the slope of the pile
        self.density = 1922  # density of material in kg/m^3
        self.z_collision = 0
        self.last_volume = 0
        self.volume_sum = 0
        self.last_z_pile = 0
        self.last_x_step = 0
        self.last_z_collision = 0
        self.tip_position = Point()
        self.last_reward = 0
        self.pitch = 0
        self.counter = 0
        self.c = 0
        self.flag = 0
        self.max_volume = 850  # max bucket operation load for tool

        # Done
        self.max_pitch_angle = rospy.get_param('/moving_cube/max_pitch_angle')

        # Rewards
        self.move_distance_reward_weight = rospy.get_param(
            "/moving_cube/move_distance_reward_weight")
        self.y_linear_speed_reward_weight = rospy.get_param(
            "/moving_cube/y_linear_speed_reward_weight")
        self.y_axis_angle_reward_weight = rospy.get_param(
            "/moving_cube/y_axis_angle_reward_weight")
        self.end_episode_points = rospy.get_param(
            "/moving_cube/end_episode_points")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()
        self.gazebo.unpauseSim()
        self.check_all_sensors_ready()
        self.pub_bucket_tip = rospy.Publisher('/bobcat/tip_position',
                                              Point,
                                              queue_size=10)

        rospy.Subscriber('/bobcat/arm/hydraulics', Float64, self.get_hyd)
        rospy.Subscriber('/WPD/Speed', TwistStamped, self.get_vel)
        rospy.Subscriber('/bobcat/arm/loader', Float64, self.get_bucket_vel)
        rospy.Subscriber("/Bobby/joint_states", JointState,
                         self.joints_callback)
        rospy.Subscriber("/Bobby/odom", Odometry, self.odom_callback)
        rospy.Subscriber('/robot_bumper', ContactsState, self.get_depth)
        rospy.Subscriber('/Bobby/imu', Imu, self.get_angular_vel)
        rospy.Subscriber("/gazebo/link_states", LinkStates,
                         self.cb_link_states)

        self.pub = rospy.Publisher('/WPD/Speed', TwistStamped, queue_size=10)
        self.pub2 = rospy.Publisher('/bobcat/arm/hydraulics',
                                    Float64,
                                    queue_size=10)
        self.pub3 = rospy.Publisher('/bobcat/arm/loader',
                                    Float64,
                                    queue_size=10)
        self.pub4 = rospy.Publisher('/volume', Float32, queue_size=10)
        self.pub5 = rospy.Publisher('/penetration_z', Float32, queue_size=10)

        self.gazebo.pauseSim()
        self.command = TwistStamped()
        self.node_process = Popen(
            shlex.split('rosrun robil_lihi C_response_loader.py'))
        self.node_process.terminate()

    def cb_link_states(self, msg):
        self.link_states = msg
        self.loader_index = self.link_states.name.index("Bobby::loader")
        self.loader_pos = self.link_states.pose[self.loader_index]

    def get_angular_vel(self, msg):
        self.angular_vel = msg.angular_velocity.y
        orientation_q = msg.orientation
        orientation_list = [
            orientation_q.x, orientation_q.y, orientation_q.z, orientation_q.w
        ]
        (self.roll, self.pitch,
         self.yaw) = euler_from_quaternion(orientation_list)

    def plot_x(self):
        rospy.wait_for_service('/gazebo/get_link_state')
        loader_pos = self.get_link_state('Bobby::loader',
                                         'world').link_state.pose
        x = np.linspace(-0.2, 1.5, 100)
        y = self.m * x + 0.2
        plt.plot(x, y, color='goldenrod')
        if self.counter % 20 == 0:
            plt.plot(loader_pos.position.x + 0.96 * math.cos(self.pitch),
                     loader_pos.position.z + 0.96 * math.sin(self.pitch), 'bo')
            plt.title("Bucket position")
            plt.ylabel("z axis")
            plt.xlabel("x axis")
            plt.axis([-1, 1.5, 0, 1.5])
            plt.axis('equal')
            plt.draw()
            plt.pause(0.00000000001)

        self.counter += 1

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        self.gazebo.unpauseSim()
        now = rospy.get_rostime()
        action[0] = 0.25 * action[0] + 0.25
        action[1] = 0.25 * action[1] + 0.25
        action[2] = 0.25 * action[2] + 0.25
        self.set_action(action)
        self.gazebo.pauseSim()
        obs = self._get_obs()
        done = self._is_done(obs, now)
        reward = self.compute_reward(obs, done, now)
        simplified_obs = self.convert_obs_to_state(obs)

        self.plot_x()  # plot graph of bucket tip position
        plt.ion()
        plt.show()
        return np.array(simplified_obs, dtype=np.float32), reward, done, {}

    def reset(self):
        self.gazebo.unpauseSim()
        self.check_all_sensors_ready()
        self.z_collision = 0
        self.depth = 0
        self.command.twist.linear.x = 0
        self.pub.publish(self.command)
        self.pub2.publish(0)
        self.pub3.publish(0)
        self.node_process.terminate()
        self.set_init_pose()
        obs = self._get_obs()
        now = rospy.get_rostime()
        self._is_done(obs, now)

        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.node_process = Popen(
            shlex.split('rosrun robil_lihi C_response_loader.py'))
        self.gazebo.unpauseSim()
        self.command.twist.linear.x = 0
        self.pub.publish(self.command)
        self.pub2.publish(0)
        self.pub3.publish(0)
        self.set_init_pose()

        self.init_env_variables()
        self.check_all_sensors_ready()
        rospy.sleep(1)
        self.gazebo.pauseSim()
        self.init_env_variables()
        obs = self._get_obs()
        simplified_obs = self.convert_obs_to_state(obs)
        plt.clf()
        return simplified_obs

    def render(self, mode='human', close=False):
        pass

    def init_env_variables(self):
        """
        Inits variables needed to be initialised each time we reset at the start
        of an episode.
        :return:
        """
        self.depth = 0.0
        self.last_volume = 0.0
        self.total_distance_moved = 0.0
        self.volume_sum = 0
        self.last_z_pile = 0
        self.last_x_step = 0
        self.last_z_collision = 0
        self.z_collision = 0
        self.last_reward = 0
        self.pitch = 0
        self.flag = 0

    def _is_done(self, observations, now):

        if self.max_volume - 5 < self.volume_sum or now.secs > 15 or (
                self.depth < 0.001
                and self.volume_sum > 5):  # term to restart the episode
            done = True
        else:
            done = False

        return done

    def set_action(self, action):
        'execute the action choosen in the gazebo environment'
        command = TwistStamped()
        command.twist.linear.x = action[0]
        self.pub.publish(command)
        self.pub2.publish(action[1])
        self.pub3.publish(action[2])

    def _get_obs(self):
        """
        Here we define what sensor data defines our robots observations
        To know which Variables we have acces to, we need to read the
        MyCubeSingleDiskEnv API DOCS
        :return:
        """

        bucket_observations = [
            self.loader_pos.position.x + 0.96 * math.cos(self.pitch),
            self.loader_pos.position.z + 0.96 * math.sin(self.pitch)
        ]
        return bucket_observations

    def get_orientation_euler(self):
        # We convert from quaternions to euler
        orientation_list = [
            self.odom.pose.pose.orientation.x,
            self.odom.pose.pose.orientation.y,
            self.odom.pose.pose.orientation.z,
            self.odom.pose.pose.orientation.w
        ]

        roll, pitch, yaw = euler_from_quaternion(orientation_list)
        return roll, pitch, yaw

    def compute_reward(self, observations, done, now):

        if not done:
            reward = -10
            self.calc_volume()
            self.pub4.publish(self.volume_sum)
            if self.volume_sum > 0 and self.depth <= 0.01 and self.flag == 0:
                self.flag = 1
                self.pub5.publish(self.loader_pos.position.z +
                                  0.96 * math.sin(self.pitch))
            if self.volume_sum > self.max_volume:  # failed to take the bucket out, too many soil
                reward -= self.volume_sum
                rospy.logwarn("############### Fail=>" + str(self.volume_sum))
            elif self.volume_sum > 0.01:
                reward += 0.3 * self.volume_sum
            if self.volume_sum == 0:
                reward = reward - 10 * (self.loader_pos.position.z +
                                        0.96 * math.sin(self.pitch))
        else:  # The episode didn't success so we need to give a big negative reward
            reward = 0
            self.depth = 0
        return reward

    def joints_callback(self, data):
        self.joints = data

    def odom_callback(self, data):
        self.odom = data
        roll, pitch, yaw = self.get_orientation_euler()

    def check_all_sensors_ready(self):
        self.check_joint_states_ready()
        self.check_odom_ready()
        rospy.logdebug("ALL SENSORS READY")

    def get_hyd(self, data):  # subscriber for arm vel
        self.hyd = data.data

    def get_bucket_vel(self, data):  # subscriber for bucket vel
        self.bucket_vel = data.data

    def get_vel(self, msg):  # subscriber for bobcat velocity
        self.bobcat_vel = msg.twist.linear.x

    def check_joint_states_ready(self):
        self.joints = None
        while self.joints is None and not rospy.is_shutdown():
            try:
                self.joints = rospy.wait_for_message("/Bobby/joint_states",
                                                     JointState,
                                                     timeout=1.0)
                rospy.logdebug("Current Bobby/joint_states READY=>" +
                               str(self.joints))

            except:
                rospy.logerr(
                    "Current Bobby/joint_states not ready yet, retrying for getting Bobby"
                )
        return self.joints

    def check_odom_ready(self):
        self.odom = None
        while self.odom is None and not rospy.is_shutdown():
            try:
                self.odom = rospy.wait_for_message("/Bobby/odom",
                                                   Odometry,
                                                   timeout=1.0)
                rospy.logdebug("Current /Bobby/odom READY=>" + str(self.odom))

            except:
                rospy.logerr(
                    "Current /Bobby/odom not ready yet, retrying for getting odom"
                )

        return self.odom

    def set_init_pose(self):
        """Sets the Robot in its init pose
        """
        now = rospy.get_rostime()
        while not rospy.is_shutdown():
            self.command.twist.linear.x = 0
            self.pub.publish(self.command)
            while self.odom.pose.pose.position.z > 0.13:
                if now.secs > 45:
                    break
                self.pub2.publish(-0.5)
            while self.odom.pose.pose.position.z < 0.03:
                if now.secs > 45:
                    break
                self.pub2.publish(0.5)
            self.pub2.publish(0)

            roll, pitch, yaw = self.get_orientation_euler()
            while pitch > 0.0:
                if now.secs > 45:
                    break
                self.pub3.publish(0.3)
                roll, pitch, yaw = self.get_orientation_euler()
            while pitch < -0.05:
                if now.secs > 45:
                    break
                self.pub3.publish(-0.3)
                roll, pitch, yaw = self.get_orientation_euler()
            self.pub3.publish(0)
            break

    def go_up(self):
        """get the bucket out of soil
        """
        self.gazebo.unpauseSim()  # must in order to get the bucket up
        self.check_all_sensors_ready()

        self.command.twist.linear.x = 0
        while not rospy.is_shutdown():
            self.command.twist.linear.x = 0
            self.pub.publish(self.command)
            self.pub2.publish(0)
            self.pub3.publish(0)
            print("pitch:", self.pitch)
            print("z:", self.loader_pos.position.z)
            while self.pitch > -0.45:
                self.pub3.publish(0.3)
            self.pub3.publish(0)
            while self.loader_pos.position.z < 1.0:
                self.pub2.publish(0.2)
            self.pub2.publish(0)
            print("total volume:", self.volume_sum)
            self.plot_x()  # plot graph of bucket tip position
            plt.ion()
            plt.show()
            break

    def convert_obs_to_state(self, observations):
        """
        Converts the observations used for reward and so on to the essentials for the robot state
        In this case we only need the orientation of the cube and the speed of the disc.
        The distance doesnt condition at all the actions
        """
        BobcatPos_x = observations[0]
        BobcatPos_z = observations[1]

        state_converted = [BobcatPos_x, BobcatPos_z]

        return state_converted

    def get_depth(self, data):
        if (ContactsState.states != []):
            i = 0
            for i in range(len(data.states)):
                if ('box' in data.states[i].collision2_name) or (
                        'box' in data.states[i].collision1_name
                ):  # check that the string exist in collision2_name/1
                    self.depth = np.mean(data.states[i].depths)
                    self.z_collision = np.mean(
                        data.states[i].contact_positions[0].z)

    def calc_volume(self):
        z_pile = self.m * (
            self.loader_pos.position.x + 0.96 * math.cos(self.pitch) + 0.2
        )  # 0.96 is the distance between center mass of the loader to the end
        H = z_pile - self.z_collision
        x = self.loader_pos.position.x + 0.96 * math.cos(self.pitch)
        z = self.z_collision
        big_trapezoid = (x - self.last_x_step) * (z_pile +
                                                  self.last_z_pile) / 2
        small_trapezoid = (x - self.last_x_step) * (self.z_collision +
                                                    self.last_z_collision) / 2
        volume = (big_trapezoid - small_trapezoid
                  ) * 1.612 * self.density  # 1.66 is the tool width
        if z_pile > 0 and z > 0 and z_pile > z:
            self.volume_sum = self.volume_sum + volume
        self.last_z_pile = z_pile
        self.last_x_step = x
        self.last_z_collision = self.z_collision
class CartPole3DEnv(gym.Env): #class inherit from gym.Env.

    def __init__(self): # We initialize and define init function.
        number_actions = rospy.get_param('/cart_pole_3d/n_actions')
        self.action_space = spaces.Discrete(number_actions)
        self.state_size = 3

        self._seed()

        # get configuration parameters
        self.init_cart_vel = rospy.get_param('/cart_pole_3d/init_cart_vel')

        # Actions
        self.cart_speed_fixed_value = rospy.get_param('/cart_pole_3d/cart_speed_fixed_value')

        self.start_point = Point()
        self.start_point.x = rospy.get_param("/cart_pole_3d/init_cube_pose/x")
        self.start_point.y = rospy.get_param("/cart_pole_3d/init_cube_pose/y")
        self.start_point.z = rospy.get_param("/cart_pole_3d/init_cube_pose/z")

        # Done
        self.max_angle = rospy.get_param('/cart_pole_3d/max_angle')
        self.max_distance = rospy.get_param('/cart_pole_3d/max_distance')

        # Rewards
        self.keep_pole_up_reward = rospy.get_param("/cart_pole_3d/keep_pole_up_reward")
        self.end_episode_points = rospy.get_param("/cart_pole_3d/end_episode_points")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()
        self.controllers_list = ['joint_state_controller',
                                 'cart_joint_velocity_controller'
                                 ]
        self.controllers_object = ControllersConnection(namespace="cart_pole_3d",
                                                        controllers_list=self.controllers_list)
        """
        Namespace of robot is the namespace in front of your controller.
        Controllers_list => that we want to reset at each time that we call the reset controllers.
          This case we just have two: joint_state_controller, inertia_wheel_roll_joint_velocity_controller
        We need to create a function which gets retrieves those controllers.
        """

        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()

        rospy.Subscriber("/cart_pole_3d/joint_states", JointState, self.joints_callback)
        rospy.Subscriber("/cart_pole_3d/odom", Odometry, self.odom_callback)
        # rospy.Subscriber("/cart_pole_3d/imu", Imu, self.imu_callback)

        self._cart_velocity_publisher = rospy.Publisher('/cart_pole_3d/cart_joint_velocity_controller/command',
                                             Float64, queue_size=1)

        self.check_publishers_connection()
        self.gazebo.pauseSim()

    def _seed(self, seed=None):  # overriden function
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):  # overriden function

        self.gazebo.unpauseSim()
        self.set_action(action)
        self.gazebo.pauseSim()
        obs = self._get_obs()
        done = self._is_done(obs)
        info = {}
        reward = self.compute_reward(obs, done)
        simplified_obs = self.convert_obs_to_state(obs)

        return simplified_obs, reward, done, info

    def _reset(self):  # We are using a virtual function defined in the gym infrastructure.

        """
        Everytime we start episode, robot will be the same place and configurations. Because otherwise it won't
        learn correctly and we can't iterated. => basic stuff for Reinforcement learning.
        :return:
        """
        self.gazebo.unpauseSim()
        """
        why we need to unpauseSim because resetting controllers and for checking the sensors, we need the simulation
        to be running because otherwise we don't have any sensory data and we don't have access to the controller reset
        functions services they won't work and tell you to hit play. => it is very important.
        """
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()
        self.set_init_pose()
        #initialized robot
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()
        self.gazebo.pauseSim()
        self.init_env_variables()
        obs = self._get_obs()
        simplified_obs = self.convert_obs_to_state(obs)

        return simplified_obs

    def init_env_variables(self):
        """
        Inits variables needed to be initialised each time we reset at the start
        of an episode.
        :return:
        """
        self.total_distance_moved = 0.0
        self.current_y_distance = self.get_y_dir_distance_from_start_point(self.start_point)
        self.cart_current_speed = rospy.get_param('/cart_pole_3d/init_cart_vel')

    def _is_done(self, observations):

        # MAximum distance to travel permited in meters from origin
        max_upper_distance = self.max_distance
        max_lower_distance = -self.max_distance
        max_angle_allowed = self.max_angle

        if (observations[1] > max_upper_distance or observations[1] < max_lower_distance):
            rospy.logerr("Cart Too Far==>" + str(observations[1]))
            done = True
        elif(observations[2] > max_angle_allowed or observations[2] < -max_angle_allowed):
            rospy.logerr("Pole excceed allowed angle =>" + str(observations[2]))
            done = True
        else:
            # rospy.loginfo("Cart NOT Too Far==>" + str(observations[1]))
            # rospy.loginfo("Pole still in range==>" + str(observations[2]))
            done = False

        return done

    def set_action(self, action):

        # We convert the actions to speed movements to send to the parent class CubeSingleDiskEnv
        if action == 0:  # Move left
            self.cart_current_speed = self.cart_speed_fixed_value
        elif action == 1:  # Move right
            self.cart_current_speed = -1*self.cart_speed_fixed_value

        # We clamp Values to maximum
        rospy.logdebug("cart_current_speed before clamp==" + str(self.cart_current_speed))
        self.cart_current_speed = numpy.clip(self.cart_current_speed,
                                          -1 * self.cart_speed_fixed_value,
                                          self.cart_speed_fixed_value)
        rospy.logdebug("cart_current_speed after clamp==" + str(self.cart_current_speed))

        self.move_joints(self.cart_current_speed)

    def _get_obs(self):
        """
        Here we define what sensor data defines our robots observations
        To know which Variables we have acces to, we need to read the
        MyCubeSingleDiskEnv API DOCS
        :return:
        """

        # We get the angle of the pole_angle
        pole_angle = round(self.joints.position[1],3)

        # We get the distance from the origin
        y_distance = self.get_y_dir_distance_from_start_point(self.start_point)

        # We get the current speed of the cart
        current_cart_speed_vel = self.get_cart_velocity()

        cube_observations = [
            round(current_cart_speed_vel, 1),
            round(y_distance, 1),
            round(pole_angle, 1)
        ]

        return cube_observations

    def get_cart_velocity(self):
        # We get the current joint roll velocity
        roll_vel = self.joints.velocity[1]
        return roll_vel

    def get_y_linear_speed(self):
        # We get the current joint roll velocity
        y_linear_speed = self.odom.twist.twist.linear.y
        return y_linear_speed

    def get_y_dir_distance_from_start_point(self, start_point):
        """
        Calculates the distance from the given point and the current position
        given by odometry. In this case the increase or decrease in y.
        :param start_point:
        :return:
        """
        y_dist_dir = self.odom.pose.pose.position.y - start_point.y

        return y_dist_dir

    def get_pole_angle(self):
        return self.imu.orientation.y

    def compute_reward(self, observations, done):
        speed = observations[0]
        distance = observations[1]
        angle = observations[2]

        # if not done:
        #     # positive for keeping the pole up
        #     reward_keep_pole_up = self.keep_pole_up_reward
        #     # nigative Reinforcement
        #     reward_distance = distance * -2
        #
        #     # positive reward on angle
        #     reward_angle = 10 / ((abs(angle) * 1)+1.1)
        #
        #     reward = reward_angle + reward_distance + reward_keep_pole_up
        #
        #     rospy.loginfo("Reward_distance=" + str(reward_distance))
        #     rospy.loginfo("Reward_angle= " + str(reward_angle))
        # else:
        #     reward = -1 * self.end_episode_points
        #
        # return reward
        # rospy.loginfo("pole_angle for reward==>" + str(angle))
        delta = 0.7 - abs(angle)
        reward_pole_angle = math.exp(delta*10)

        # If we are moving to the left and the pole is falling left is Bad
        # rospy.logwarn("pole_vel==>" + str(speed))
        pole_vel_sign = numpy.sign(speed)
        pole_angle_sign = numpy.sign(angle)
        # rospy.logwarn("pole_vel sign==>" + str(pole_vel_sign))
        # rospy.logwarn("pole_angle sign==>" + str(pole_angle_sign))

        # We want inverted signs for the speeds. We multiply by -1 to make minus positive.
        # global_sign + = GOOD, global_sign - = BAD
        base_reward = 500
        if speed != 0:
            global_sign = pole_angle_sign * pole_vel_sign * -1
            reward_for_efective_movement = base_reward * global_sign
        else:
            # Is a particular case. If it doesnt move then its good also
            reward_for_efective_movement = base_reward

        reward = reward_pole_angle + reward_for_efective_movement

        # rospy.logwarn("reward==>" + str(reward)+"= r_pole_angle="+str(reward_pole_angle)+",r_movement= "+str(reward_for_efective_movement))
        return reward

    def joints_callback(self, data):
        self.joints = data

    def odom_callback(self, data):
        self.odom = data


    def check_all_sensors_ready(self):
        self.check_joint_states_ready()
        self.check_odom_ready()
        # self.check_imu_ready()
        rospy.logdebug("ALL SENSORS READY")

    def check_joint_states_ready(self):
        self.joints = None
        while self.joints is None and not rospy.is_shutdown():
            try:
                self.joints = rospy.wait_for_message("/cart_pole_3d/joint_states", JointState, timeout=1.0)
                # check response from this topic for 1 second. if don't received respone, it mean not ready.
                # Assure data channels are working perfectly.
                rospy.logdebug("Current cart_pole_3d/joint_states READY=>" + str(self.joints))

            except:
                rospy.logerr("Current cart_pole_3d/joint_states not ready yet, retrying for getting joint_states")
        return self.joints

    def check_odom_ready(self):
        self.odom = None
        while self.odom is None and not rospy.is_shutdown():
            try:
                self.odom = rospy.wait_for_message("/cart_pole_3d/odom", Odometry, timeout=1.0)
                rospy.logdebug("Current /cart_pole_3d/odom READY=>" + str(self.odom))

            except:
                rospy.logerr("Current /cart_pole_3d/odom not ready yet, retrying for getting odom")

        return self.odom

    def check_publishers_connection(self):
        """
        Checks that all the publishers are working
        :return:
        """
        rate = rospy.Rate(10)  # 10hz
        while (self._cart_velocity_publisher.get_num_connections() == 0 and not rospy.is_shutdown()):
            rospy.logdebug("No susbribers to _cart_velocity_publisher yet so we wait and try again")
            try:
                rate.sleep()
            except rospy.ROSInterruptException:
                # This is to avoid error when world is rested, time when backwards.
                pass
        rospy.logdebug("_base_pub Publisher Connected")

        rospy.logdebug("All Publishers READY")

    def move_joints(self, cart_speed):
        joint_speed_value = Float64()
        joint_speed_value.data = cart_speed
        # rospy.logwarn("cart Velocity >>>>>>>" + str(joint_speed_value))
        self._cart_velocity_publisher.publish(joint_speed_value)

    def set_init_pose(self):
        """Sets the Robot in its init pose
        """
        self.move_joints(self.init_cart_vel)

        return True

    def convert_obs_to_state(self, observations):
        """
        Converts the observations used for reward and so on to the essentials for the robot state
        In this case we only need the orientation of the cube and the speed of the disc.
        The distance doesnt condition at all the actions
        """
        speed = observations[0]
        distance = observations[1]
        angle = observations[2]

        state_converted = [speed, distance, angle]

        return state_converted
def position_reset_test():
    """
    Test of position accuracy and reset system.
    """
    position = 0.0
    position = float(sys.argv[1])

    rospy.init_node('debug_test_node', anonymous=True, log_level=rospy.WARN)
    rospy.logwarn("[position=" + str(position) + "]")
    wait_time = 0.1
    controllers_object = ControllersConnection(namespace="cartpole_v0")

    debug_object = MoveCartClass()

    gazebo = GazeboConnection()
    rospy.loginfo("RESETING SIMULATION")
    gazebo.pauseSim()
    gazebo.resetSim()
    gazebo.unpauseSim()
    rospy.loginfo("CLOCK AFTER RESET")
    debug_object.get_clock_time()
    rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
    controllers_object.reset_controllers()
    rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
    debug_object.get_clock_time()
    rospy.loginfo("START CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("SET init pose...")
    debug_object.set_init_pose()
    rospy.loginfo("WAIT FOR GOING TO INIT POSE")
    time.sleep(wait_time)

    raw_input("Start Movement...PRESS KEY")
    i = 0
    wait_times_m = []
    while not rospy.is_shutdown():
        pos_x = [position]
        debug_object.move_joints(pos_x)
        delta_time = debug_object.wait_until_base_is_in_pos(position)
        wait_times_m = numpy.append(wait_times_m, [delta_time])
        debug_object.move_joints([0.0])
        delta_time = debug_object.wait_until_base_is_in_pos(0.0)
        wait_times_m = numpy.append(wait_times_m, [delta_time])
        i += 1
        if i > 10:
            average_time = numpy.average(wait_times_m)
            rospy.logwarn("[average_time Wait Time=" + str(average_time) + "]")
            break

    rospy.loginfo("END CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("CLOCK BEFORE RESET")
    debug_object.get_clock_time()

    # Reset Sim
    raw_input("END Start Movement...PRESS KEY")
    rospy.loginfo("SETTING INITIAL POSE TO AVOID")
    debug_object.set_init_pose()
    time.sleep(wait_time * 2.0)
    rospy.loginfo("AFTER INITPOSE CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo(
        "We deactivate gravity to check any reasidual effect of reseting the simulation"
    )
    gazebo.change_gravity(0.0, 0.0, 0.0)

    rospy.loginfo("RESETING SIMULATION")
    gazebo.pauseSim()
    gazebo.resetSim()
    gazebo.unpauseSim()
    rospy.loginfo("CLOCK AFTER RESET")
    debug_object.get_clock_time()

    rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
    controllers_object.reset_controllers()
    rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
    debug_object.get_clock_time()
    rospy.loginfo("We reactivating gravity...")
    gazebo.change_gravity(0.0, 0.0, -9.81)
    rospy.loginfo("END")
예제 #5
0
class URSimDoorOpening(robot_gazebo_env_goal.RobotGazeboEnv):
    def __init__(self):
        rospy.logdebug("Starting URSimDoorOpening Class object...")

        # Init GAZEBO Objects
        self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState)
        self.get_world_state = rospy.ServiceProxy('/gazebo/get_world_properties', GetWorldProperties)

        # Subscribe joint state and target pose
        rospy.Subscriber("/ft_sensor_topic", WrenchStamped, self.wrench_stamped_callback)
        rospy.Subscriber("/joint_states", JointState, self.joints_state_callback)
        rospy.Subscriber("/gazebo/link_states", LinkStates, self.link_state_callback)

        # Gets training parameters from param server
        self.desired_pose = Pose()
        self.running_step = rospy.get_param("/running_step")
        self.max_height = rospy.get_param("/max_height")
        self.min_height = rospy.get_param("/min_height")
        self.observations = rospy.get_param("/observations")
        
        # Joint limitation
        shp_max = rospy.get_param("/joint_limits_array/shp_max")
        shp_min = rospy.get_param("/joint_limits_array/shp_min")
        shl_max = rospy.get_param("/joint_limits_array/shl_max")
        shl_min = rospy.get_param("/joint_limits_array/shl_min")
        elb_max = rospy.get_param("/joint_limits_array/elb_max")
        elb_min = rospy.get_param("/joint_limits_array/elb_min")
        wr1_max = rospy.get_param("/joint_limits_array/wr1_max")
        wr1_min = rospy.get_param("/joint_limits_array/wr1_min")
        wr2_max = rospy.get_param("/joint_limits_array/wr2_max")
        wr2_min = rospy.get_param("/joint_limits_array/wr2_min")
        wr3_max = rospy.get_param("/joint_limits_array/wr3_max")
        wr3_min = rospy.get_param("/joint_limits_array/wr3_min")
        self.joint_limits = {"shp_max": shp_max,
                             "shp_min": shp_min,
                             "shl_max": shl_max,
                             "shl_min": shl_min,
                             "elb_max": elb_max,
                             "elb_min": elb_min,
                             "wr1_max": wr1_max,
                             "wr1_min": wr1_min,
                             "wr2_max": wr2_max,
                             "wr2_min": wr2_min,
                             "wr3_max": wr3_max,
                             "wr3_min": wr3_min
                             }

        shp_init_value0 = rospy.get_param("/init_joint_pose0/shp")
        shl_init_value0 = rospy.get_param("/init_joint_pose0/shl")
        elb_init_value0 = rospy.get_param("/init_joint_pose0/elb")
        wr1_init_value0 = rospy.get_param("/init_joint_pose0/wr1")
        wr2_init_value0 = rospy.get_param("/init_joint_pose0/wr2")
        wr3_init_value0 = rospy.get_param("/init_joint_pose0/wr3")
        self.init_joint_pose0 = [shp_init_value0, shl_init_value0, elb_init_value0, wr1_init_value0, wr2_init_value0, wr3_init_value0]

        shp_init_value1 = rospy.get_param("/init_joint_pose1/shp")
        shl_init_value1 = rospy.get_param("/init_joint_pose1/shl")
        elb_init_value1 = rospy.get_param("/init_joint_pose1/elb")
        wr1_init_value1 = rospy.get_param("/init_joint_pose1/wr1")
        wr2_init_value1 = rospy.get_param("/init_joint_pose1/wr2")
        wr3_init_value1 = rospy.get_param("/init_joint_pose1/wr3")
        self.init_joint_pose1 = [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1]
#	print("[init_joint_pose1]: ", [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1])

        shp_init_value2 = rospy.get_param("/init_joint_pose2/shp")
        shl_init_value2 = rospy.get_param("/init_joint_pose2/shl")
        elb_init_value2 = rospy.get_param("/init_joint_pose2/elb")
        wr1_init_value2 = rospy.get_param("/init_joint_pose2/wr1")
        wr2_init_value2 = rospy.get_param("/init_joint_pose2/wr2")
        wr3_init_value2 = rospy.get_param("/init_joint_pose2/wr3")
        self.init_joint_pose2 = [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2]
#	print("[init_joint_pose2]: ", [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2])

        shp_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shp")
        shl_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shl")
        elb_after_rotate = rospy.get_param("/eelink_pose_after_rotate/elb")
        wr1_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr1")
        wr2_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr2")
        wr3_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr3")
        self.after_rotate = [shp_after_rotate, shl_after_rotate, elb_after_rotate, wr1_after_rotate, wr2_after_rotate, wr3_after_rotate]

        shp_after_pull = rospy.get_param("/eelink_pose_after_pull/shp")
        shl_after_pull = rospy.get_param("/eelink_pose_after_pull/shl")
        elb_after_pull = rospy.get_param("/eelink_pose_after_pull/elb")
        wr1_after_pull = rospy.get_param("/eelink_pose_after_pull/wr1")
        wr2_after_pull = rospy.get_param("/eelink_pose_after_pull/wr2")
        wr3_after_pull = rospy.get_param("/eelink_pose_after_pull/wr3")
        self.after_pull = [shp_after_pull, shl_after_pull, elb_after_pull, wr1_after_pull, wr2_after_pull, wr3_after_pull]

        r_drv_value1 = rospy.get_param("/init_grp_pose1/r_drive")
        l_drv_value1 = rospy.get_param("/init_grp_pose1/l_drive")
        r_flw_value1 = rospy.get_param("/init_grp_pose1/r_follower")
        l_flw_value1 = rospy.get_param("/init_grp_pose1/l_follower")
        r_spr_value1 = rospy.get_param("/init_grp_pose1/r_spring")
        l_spr_value1 = rospy.get_param("/init_grp_pose1/l_spring")

        r_drv_value2 = rospy.get_param("/init_grp_pose2/r_drive")
        l_drv_value2 = rospy.get_param("/init_grp_pose2/l_drive")
        r_flw_value2 = rospy.get_param("/init_grp_pose2/r_follower")
        l_flw_value2 = rospy.get_param("/init_grp_pose2/l_follower")
        r_spr_value2 = rospy.get_param("/init_grp_pose2/r_spring")
        l_spr_value2 = rospy.get_param("/init_grp_pose2/l_spring")

        self.init_grp_pose1 = [r_drv_value1, l_drv_value1, r_flw_value1, l_flw_value1, r_spr_value1, l_spr_value1]
        self.init_grp_pose2 = [r_drv_value2, l_drv_value2, r_flw_value2, l_flw_value2, r_spr_value2, l_spr_value2]

        # Fill in the Done Episode Criteria list
        self.episode_done_criteria = rospy.get_param("/episode_done_criteria")
        
        # stablishes connection with simulator
        self._gz_conn = GazeboConnection()
        self._ctrl_conn = ControllersConnection(namespace="")
        
        # Controller type for ros_control
        self._ctrl_type =  rospy.get_param("/control_type")
        self.pre_ctrl_type =  self._ctrl_type

	# Get the force and troque limit
        self.force_limit = rospy.get_param("/force_limit")
        self.torque_limit = rospy.get_param("/torque_limit")

        # We init the observations
        self.base_orientation = Quaternion()
        self.imu_link = Quaternion()
        self.door = Quaternion()
        self.quat = Quaternion()
        self.imu_link_rpy = Vector3()
        self.door_rpy = Vector3()
        self.link_state = LinkStates()
        self.wrench_stamped = WrenchStamped()
        self.joints_state = JointState()
        self.end_effector = Point() 
        self.previous_action =[]
        self.counter = 0
        self.max_rewards = 1

        # Arm/Control parameters
        self._ik_params = setups['UR5_6dof']['ik_params']
        
        # ROS msg type
        self._joint_pubisher = JointPub()
        self._joint_traj_pubisher = JointTrajPub()

        # Gym interface and action
        self.action_space = spaces.Discrete(6)
        self.observation_space = 21 #np.arange(self.get_observations().shape[0])
##        self.observation_space = 15 #np.arange(self.get_observations().shape[0])
        self.reward_range = (-np.inf, np.inf)
        self._seed()

        # Change the controller type 
        set_joint_pos_server = rospy.Service('/set_position_controller', SetBool, self._set_pos_ctrl)
        set_joint_traj_pos_server = rospy.Service('/set_trajectory_position_controller', SetBool, self._set_traj_pos_ctrl)
        set_joint_vel_server = rospy.Service('/set_velocity_controller', SetBool, self._set_vel_ctrl)
        set_joint_traj_vel_server = rospy.Service('/set_trajectory_velocity_controller', SetBool, self._set_traj_vel_ctrl)
#        set_gripper_server = rospy.Service('/set_gripper_controller', SetBool, self._set_grp_ctrl)

        self.pos_traj_controller = ['joint_state_controller',
                            'gripper_controller',
                            'pos_traj_controller']
        self.pos_controller = ["joint_state_controller",
                                "gripper_controller",
                                "ur_shoulder_pan_pos_controller",
                                "ur_shoulder_lift_pos_controller",
                                "ur_elbow_pos_controller",
                                "ur_wrist_1_pos_controller",
                                "ur_wrist_2_pos_controller",
                                "ur_wrist_3_pos_controller"]
        self.vel_traj_controller = ['joint_state_controller',
                            'gripper_controller',
                            'vel_traj_controller']
        self.vel_controller = ["joint_state_controller",
                                "gripper_controller",
                                "ur_shoulder_pan_vel_controller",
                                "ur_shoulder_lift_vel_controller",
                                "ur_elbow_vel_controller",
                                "ur_wrist_1_vel_controller",
                                "ur_wrist_2_vel_controller",
                                "ur_wrist_3_vel_controller"]

        # Helpful False
        self.stop_flag = False
        stop_trainning_server = rospy.Service('/stop_training', SetBool, self._stop_trainnig)
        start_trainning_server = rospy.Service('/start_training', SetBool, self._start_trainnig)

    def check_stop_flg(self):
        if self.stop_flag is False:
            return False
        else:
            return True

    def _start_trainnig(self, req):
        rospy.logdebug("_start_trainnig!!!!")
        self.stop_flag = False
        return SetBoolResponse(True, "_start_trainnig")

    def _stop_trainnig(self, req):
        rospy.logdebug("_stop_trainnig!!!!")
        self.stop_flag = True
        return SetBoolResponse(True, "_stop_trainnig")

    def _set_pos_ctrl(self, req):
        rospy.wait_for_service('set_position_controller')
        self._ctrl_conn.stop_controllers(self.pos_traj_controller)
        self._ctrl_conn.start_controllers(self.pos_controller)
        self._ctrl_type = 'pos'
        return SetBoolResponse(True, "_set_pos_ctrl")

    def _set_traj_pos_ctrl(self, req):
        rospy.wait_for_service('set_trajectory_position_controller')
        self._ctrl_conn.stop_controllers(self.pos_controller)
        self._ctrl_conn.start_controllers(self.pos_traj_controller)    
        self._ctrl_type = 'traj_pos'
        return SetBoolResponse(True, "_set_traj_pos_ctrl")  

    def _set_vel_ctrl(self, req):
        rospy.wait_for_service('set_velocity_controller')
        self._ctrl_conn.stop_controllers(self.vel_traj_controller)
        self._ctrl_conn.start_controllers(self.vel_controller)
        self._ctrl_type = 'vel'
        return SetBoolResponse(True, "_set_vel_ctrl")

    def _set_traj_vel_ctrl(self, req):
        rospy.wait_for_service('set_trajectory_velocity_controller')
        self._ctrl_conn.stop_controllers(self.vel_controller)
        self._ctrl_conn.start_controllers(self.vel_traj_controller)    
        self._ctrl_type = 'traj_vel'
        return SetBoolResponse(True, "_set_traj_vel_ctrl")  

#    def _set_grp_ctrl(self, req):
#        rospy.wait_for_service('set_gripper_controller')
#        self._ctrl_conn.start_controllers(self.gripper_controller)    
#        return SetBoolResponse(True, "_set_grp_ctrl")  

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
        
    def link_state_callback(self, msg):
        self.link_state = msg
        self.end_effector = self.link_state.pose[12]
        self.imu_link = self.link_state.pose[5]
        self.door = self.link_state.pose[2]

#    def target_point_callback(self, msg):
#        self.target_point = msg

    def check_all_systems_ready(self):
        """
        We check that all systems are ready
        :return:
        """
        joint_states_msg = None
        while joint_states_msg is None and not rospy.is_shutdown():
            try:
                joint_states_msg = rospy.wait_for_message("/joint_states", JointState, timeout=0.1)
                self.joints_state = joint_states_msg
                rospy.logdebug("Current joint_states READY")
            except Exception as e:
                self._ctrl_conn.start_controllers(controllers_on="joint_state_controller")                
                rospy.logdebug("Current joint_states not ready yet, retrying==>"+str(e))
        
        link_states_msg = None
        while link_states_msg is None and not rospy.is_shutdown():
            try:
                link_states_msg = rospy.wait_for_message("/gazebo/link_states", LinkStates, timeout=0.1)
                self.link_states = link_states_msg
                rospy.logdebug("Reading link_states READY")
            except Exception as e:
                rospy.logdebug("Reading link_states not ready yet, retrying==>"+str(e))

        rospy.logdebug("ALL SYSTEMS READY")

    def get_xyz(self, q):
        """Get x,y,z coordinates 
        Args:
            q: a numpy array of joints angle positions.
        Returns:
            xyz are the x,y,z coordinates of an end-effector in a Cartesian space.
        """
        mat = ur_utils.forward(q, self._ik_params)
        xyz = mat[:3, 3]
        return xyz

    def get_current_xyz(self):
        """Get x,y,z coordinates according to currrent joint angles
        Returns:
        xyz are the x,y,z coordinates of an end-effector in a Cartesian space.
        """
        joint_states = self.joints_state
        shp_joint_ang = joint_states.position[0]
        shl_joint_ang = joint_states.position[1]
        elb_joint_ang = joint_states.position[2]
        wr1_joint_ang = joint_states.position[3]
        wr2_joint_ang = joint_states.position[4]
        wr3_joint_ang = joint_states.position[5]
        
        q = [shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang]
        mat = ur_utils.forward(q, self._ik_params)
        xyz = mat[:3, 3]
        return xyz
            
    def get_orientation(self, q):
        """Get Euler angles 
        Args:
            q: a numpy array of joints angle positions.
        Returns:
            xyz are the x,y,z coordinates of an end-effector in a Cartesian space.
        """
        mat = ur_utils.forward(q, self._ik_params)
        orientation = mat[0:3, 0:3]
        roll = -orientation[1, 2]
        pitch = orientation[0, 2]
        yaw = -orientation[0, 1]
        
        return Vector3(roll, pitch, yaw)


    def cvt_quat_to_euler(self, quat):
        euler_rpy = Vector3()
        euler = euler_from_quaternion([self.quat.x, self.quat.y, self.quat.z, self.quat.w])

        euler_rpy.x = euler[0]
        euler_rpy.y = euler[1]
        euler_rpy.z = euler[2]
        return euler_rpy

    def init_joints_pose(self, init_pos):
        """
        We initialise the Position variable that saves the desired position where we want our
        joints to be
        :param init_pos:
        :return:
        """
        self.current_joint_pose =[]
        self.current_joint_pose = copy.deepcopy(init_pos)
#	print("[current_joint_pose]:", self.current_joint_pose, type(self.current_joint_pose))
        return self.current_joint_pose

    def get_euclidean_dist(self, p_in, p_pout):
        """
        Given a Vector3 Object, get distance from current position
        :param p_end:
        :return:
        """
        a = numpy.array((p_in.x, p_in.y, p_in.z))
        b = numpy.array((p_pout.x, p_pout.y, p_pout.z))

        distance = numpy.linalg.norm(a - b)

        return distance

    def joints_state_callback(self,msg):
        self.joints_state = msg

    def wrench_stamped_callback(self,msg):
        self.wrench_stamped = msg

    def get_observations(self):
        """
        Returns the state of the robot needed for OpenAI QLearn Algorithm
        The state will be defined by an array
        :return: observation
        """
        joint_states = self.joints_state
        self.force = self.wrench_stamped.wrench.force
        self.torque = self.wrench_stamped.wrench.torque
#        print("[force]", self.force.x, self.force.y, self.force.z)
#        print("[torque]", self.torque.x, self.torque.y, self.torque.z)

        shp_joint_ang = joint_states.position[2]
        shl_joint_ang = joint_states.position[1]
        elb_joint_ang = joint_states.position[0]
        wr1_joint_ang = joint_states.position[9]
        wr2_joint_ang = joint_states.position[10]
        wr3_joint_ang = joint_states.position[11]

        shp_joint_vel = joint_states.velocity[2]
        shl_joint_vel = joint_states.velocity[1]
        elb_joint_vel = joint_states.velocity[0]
        wr1_joint_vel = joint_states.velocity[9]
        wr2_joint_vel = joint_states.velocity[10]
        wr3_joint_vel = joint_states.velocity[11]

        q = [shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang]
        eef_x, eef_y, eef_z = self.get_xyz(q)

        observation = []
#        rospy.logdebug("List of Observations==>"+str(self.observations))
        for obs_name in self.observations:
            if obs_name == "shp_joint_ang":
                observation.append(shp_joint_ang)
            elif obs_name == "shl_joint_ang":
                observation.append(shl_joint_ang)
            elif obs_name == "elb_joint_ang":
                observation.append(elb_joint_ang)
            elif obs_name == "wr1_joint_ang":
                observation.append(wr1_joint_ang)
            elif obs_name == "wr2_joint_ang":
                observation.append(wr2_joint_ang)
            elif obs_name == "wr3_joint_ang":
                observation.append(wr3_joint_ang)
            elif obs_name == "shp_joint_vel":
                observation.append(shp_joint_vel)
            elif obs_name == "shl_joint_vel":
                observation.append(shl_joint_vel)
            elif obs_name == "elb_joint_vel":
                observation.append(elb_joint_vel)
            elif obs_name == "wr1_joint_vel":
                observation.append(wr1_joint_vel)
            elif obs_name == "wr2_joint_vel":
                observation.append(wr2_joint_vel)
            elif obs_name == "wr3_joint_vel":
                observation.append(wr3_joint_vel)
            elif obs_name == "eef_x":
                observation.append(eef_x)
            elif obs_name == "eef_y":
                observation.append(eef_y)
            elif obs_name == "eef_z":
                observation.append(eef_z)
            elif obs_name == "force_x":
                observation.append(self.force.x)
            elif obs_name == "force_y":
                observation.append(self.force.y)
            elif obs_name == "force_z":
                observation.append(self.force.z)
            elif obs_name == "torque_x":
                observation.append(self.torque.x)
            elif obs_name == "torque_y":
                observation.append(self.torque.y)
            elif obs_name == "torque_z":
                observation.append(self.torque.z)
            else:
                raise NameError('Observation Asked does not exist=='+str(obs_name))

        return observation

    def clamp_to_joint_limits(self):
        """
        clamps self.current_joint_pose based on the joint limits
        self._joint_limits
        {
         "shp_max": shp_max,
         "shp_min": shp_min,
         ...
         }
        :return:
        """

        rospy.logdebug("Clamping current_joint_pose>>>" + str(self.current_joint_pose))
        shp_joint_value = self.current_joint_pose[0]
        shl_joint_value = self.current_joint_pose[1]
        elb_joint_value = self.current_joint_pose[2]
        wr1_joint_value = self.current_joint_pose[3]
        wr2_joint_value = self.current_joint_pose[4]
        wr3_joint_value = self.current_joint_pose[5]

        self.current_joint_pose[0] = max(min(shp_joint_value, self._joint_limits["shp_max"]), self._joint_limits["shp_min"])
        self.current_joint_pose[1] = max(min(shl_joint_value, self._joint_limits["shl_max"]), self._joint_limits["shl_min"])
        self.current_joint_pose[2] = max(min(elb_joint_value, self._joint_limits["elb_max"]), self._joint_limits["elb_min"])
        self.current_joint_pose[3] = max(min(wr1_joint_value, self._joint_limits["wr1_max"]), self._joint_limits["wr1_min"])
        self.current_joint_pose[4] = max(min(wr2_joint_value, self._joint_limits["wr2_max"]), self._joint_limits["wr2_min"])
        self.current_joint_pose[5] = max(min(wr3_joint_value, self._joint_limits["wr3_max"]), self._joint_limits["wr3_min"])

        rospy.logdebug("DONE Clamping current_joint_pose>>>" + str(self.current_joint_pose))

    # Resets the state of the environment and returns an initial observation.
    def reset(self):

	# Go to initial position
	self._gz_conn.unpauseSim()
        rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose0))
        init_pos0 = self.init_joints_pose(self.init_joint_pose0)
        self.arr_init_pos0 = np.array(init_pos0, dtype='float32')
        init_pos1 = self.init_joints_pose(self.init_joint_pose1)
        self.arr_init_pos1 = np.array(init_pos1, dtype='float32')
        init_g_pos1 = self.init_joints_pose(self.init_grp_pose1)
        arr_init_g_pos1 = np.array(init_g_pos1, dtype='float32')

        jointtrajpub = JointTrajPub()
        for update in range(500):
        	jointtrajpub.GrpCommand(arr_init_g_pos1)
        time.sleep(2)
        for update in range(1000):
        	jointtrajpub.jointTrajectoryCommand(self.arr_init_pos1)
        time.sleep(0.3)
        for update in range(1000):
        	jointtrajpub.jointTrajectoryCommand(self.arr_init_pos0)
        time.sleep(0.3)

        # 0st: We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self._gz_conn.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self._gz_conn.resetSim()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self._gz_conn.change_gravity_zero()

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        rospy.logdebug("reset_ur_joint_controllers...")
        self._ctrl_conn.reset_ur_joint_controllers(self._ctrl_type)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose1))
        rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose2))

        # We save that position as the current joint desired position
#	print("[init_joint_pose1]:", self.init_joint_pose1, type(self.init_joint_pose1))

        init_pos2 = self.init_joints_pose(self.init_joint_pose2)
        self.arr_init_pos2 = np.array(init_pos2, dtype='float32')
        after_rotate = self.init_joints_pose(self.after_rotate)
        self.arr_after_rotate = np.array(after_rotate, dtype='float32')
        after_pull = self.init_joints_pose(self.after_pull)
        self.arr_after_pull = np.array(after_pull, dtype='float32')

        init_g_pos2 = self.init_joints_pose(self.init_grp_pose2)
        arr_init_g_pos2 = np.array(init_g_pos2, dtype='float32')

        # 4th: We Set the init pose to the jump topic so that the jump control can update
        # We check the jump publisher has connection

        if self._ctrl_type == 'traj_pos':
            self._joint_traj_pubisher.check_publishers_connection()
        elif self._ctrl_type == 'pos':
            self._joint_pubisher.check_publishers_connection()
        elif self._ctrl_type == 'traj_vel':
            self._joint_traj_pubisher.check_publishers_connection()
        elif self._ctrl_type == 'vel':
            self._joint_pubisher.check_publishers_connection()
        else:
            rospy.logwarn("Controller type is wrong!!!!")
        
        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
        rospy.logdebug("check_all_systems_ready...")
        self.check_all_systems_ready()

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self._gz_conn.adjust_gravity()

        for update in range(1000):
        	jointtrajpub.jointTrajectoryCommand(self.arr_init_pos1)
        time.sleep(0.3)
        for update in range(1000):
        	jointtrajpub.jointTrajectoryCommand(self.arr_init_pos2)
        time.sleep(0.3)
        for update in range(50):
        	jointtrajpub.GrpCommand(arr_init_g_pos2)
        time.sleep(2)

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self._gz_conn.pauseSim()

        # 8th: Get the State Discrete Stringuified version of the observations
        rospy.logdebug("get_observations...")
       	observation = self.get_observations()
#        print("[observations]", observation)

        return observation

    def _act(self, action):
        if self._ctrl_type == 'traj_pos':
            self.pre_ctrl_type = 'traj_pos'
            self._joint_traj_pubisher.jointTrajectoryCommand(action)
        elif self._ctrl_type == 'pos':
            self.pre_ctrl_type = 'pos'
            self._joint_pubisher.move_joints(action)
        elif self._ctrl_type == 'traj_vel':
            self.pre_ctrl_type = 'traj_vel'
            self._joint_traj_pubisher.jointTrajectoryCommand(action)
        elif self._ctrl_type == 'vel':
            self.pre_ctrl_type = 'vel'
            self._joint_pubisher.move_joints(action)
        else:
            self._joint_pubisher.move_joints(action)
        
    def training_ok(self):
        rate = rospy.Rate(1)
        while self.check_stop_flg() is True:                  
            rospy.logdebug("stop_flag is ON!!!!")
            self._gz_conn.unpauseSim()

            if self.check_stop_flg() is False:
                break 
            rate.sleep()
                
    def step(self, action):
        '''
        ('action: ', array([ 0.,  0. , -0., -0., -0. , 0. ], dtype=float32))        
        '''
        rospy.logdebug("UR step func")	# define the logger
        self.training_ok()

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot
        # Act
        self._gz_conn.unpauseSim()

        action = action + self.arr_init_pos2
        self._act(action)
#	print("[action]", action)

        self.wrench_stamped
        self.force = self.wrench_stamped.wrench.force
        self.torque = self.wrench_stamped.wrench.torque
#	print("wrench", self.wrench_stamped, type(self.wrench_stamped)) 	#<class 'geometry_msgs.msg._WrenchStamped.WrenchStamped'>
#        print("[force]", self.force.x, self.force.y, self.force.z)
#        print("[torque]", self.torque.x, self.torque.y, self.torque.z)

        '''
        if self.force_limit < self.force.x or self.force.x < -self.force_limit:
        	self._act(self.previous_action)
#        	print("force.x over the limit")
#        	print("[previous_action]", self.previous_action)
        elif self.force_limit < self.force.y or self.force.y < -self.force_limit:
        	self._act(self.previous_action)
#        	print("force.y over the limit")
#        	print("[previous_action]", self.previous_action)
        elif self.force_limit < self.force.z or self.force.z < -self.force_limit:
        	self._act(self.previous_action)
#        	print("force.z over the limit")
#        	print("[previous_action]", self.previous_action)
        elif self.torque_limit < self.torque.x or self.torque.x < -self.torque_limit:
        	self._act(self.previous_action)
#        	print("torque.x over the limit")
#        	print("[previous_action]", self.previous_action)
        elif self.torque_limit < self.torque.y or self.torque.y < -self.torque_limit:
        	self._act(self.previous_action)
#        	print("torque.y over the limit")
#        	print("[previous_action]", self.previous_action)
        elif self.torque_limit < self.torque.z or self.torque.z < -self.torque_limit:
        	self._act(self.previous_action)
#        	print("torque.z over the limit")
#        	print("[previous_action]", self.previous_action)
        else:
        	self.previous_action = copy.deepcopy(action)
#        	print("[action]", action)
        '''
    
        # Then we send the command to the robot and let it go for running_step seconds
        time.sleep(self.running_step)
        self._gz_conn.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        observation = self.get_observations()
#        print("[observations]", observation)

        # finally we get an evaluation based on what happened in the sim
        reward = self.compute_dist_rewards()
        done = self.check_done()

        return observation, reward, done, {}

    def compute_dist_rewards(self):
        self.quat = self.door.orientation
        self.door_rpy = self.cvt_quat_to_euler(self.quat)
        self.quat = self.imu_link.orientation
        self.imu_link_rpy = self.cvt_quat_to_euler(self.quat)
        compute_rewards = 0

#        self.rpy = self.cvt_quat_to_euler(Quaternion(0.0, 0.0, 0.7071, 0.7071))
        #print ("[self.target_point]: ", [self.target_point.x, self.target_point.y, self.target_point.z])
        #print ("(self.get_current_xyz(): ", self.get_current_xyz())
        #end_effector_pos = np.array([self.end_effector.position.x, self.end_effector.position.y, self.end_effector.position.z])
        #self.distance = np.linalg.norm(end_effector_pos - [self.target_point.x, self.target_point.y, self.target_point.z], axis=0)

#        print ("[door]: ", [self.door.orientation.x, self.door.orientation.y, self.door.orientation.z, self.door.orientation.w])
#        print ("[imu_link]: ", [self.imu_link.orientation.x, self.imu_link.orientation.y, self.imu_link.orientation.z, self.imu_link.orientation.w])
#        print ("[door_rpy]: ", [self.door_rpy.x, self.door_rpy.y, self.door_rpy.z])			# [-3.141590232638843, 4.64637166410168e-06, 3.1407993185850303]
													# => [-3.141587417428544, 6.811796590263218e-05, 2.8971100347923704]
#        print ("[self.imu_link_rpy]: ", [self.imu_link_rpy.x, self.imu_link_rpy.y, self.imu_link_rpy.z])	# [-5.017238272290064e-06, 2.560885641286913e-07, 1.5707993173228965]
													# => [1.2205817198134408, -4.341035340318689e-06, 1.3270298472237638]
#        print ("[type]: ", type(self.door_rpy), type(self.end_effector.position))
#        print ("[rpy]: ", [self.rpy.x, self.rpy.y, self.rpy.z])

#	self.counter = self.counter + 1

#        return self.imu_link_rpy.x + 1.5708061 - self.imu_link_rpy.z  # for door opening
        compute_rewards = self.imu_link_rpy.x + 1.5708061 - self.imu_link_rpy.z

        return compute_rewards  # for door opening

# close
#('[door_rpy]: ', [-3.141589494723927, 5.371950050444065e-06, 3.140803800525111])
#('[self.imu_link_rpy]: ', [-5.406752832019292e-06, 6.896590419417709e-06, 1.5708061011106662])

# open
#('[door_rpy]: ', [3.1374584007550737, -0.0018131747911100536, 2.764050391990123(delta=0.3768)])
#('[self.imu_link_rpy]: ', [1.0934212049101757(delta=1.0934), -0.004085577221111396, 1.1941435185763472(delta=0.3768)])

    def check_done(self):
#        if 3.1408 - self.door_rpy.z > 3.14 / 180 * 10: # for door opening
        if self.imu_link_rpy.x + 1.5708061 - self.imu_link_rpy.z > 3000: # for door opening
            print("done")
            return True
        else :
        	return False
class OldMovingCubeEnv(gym.Env):
    def __init__(self):
        self.publishers_array = []
        self._roll_vel_pub = rospy.Publisher(
            '/moving_cube/inertia_wheel_roll_joint_velocity_controller/command',
            Float64,
            queue_size=1)
        self.publishers_array.append(self._roll_vel_pub)

        self.action_space = spaces.Discrete(3)  #l,r,nothing
        self._seed()

        #get configuration parameters
        self.wait_time = rospy.get_param('/moving_cube/wait_time')
        self.running_step = rospy.get_param('/moving_cube/running_step')
        self.speed_step = rospy.get_param('/moving_cube/speed_step')
        self.init_roll_vel = rospy.get_param('/moving_cube/init_roll_vel')

        self.init_internal_vars(self.init_roll_vel)

        rospy.Subscriber("/moving_cube/joint_states", JointState,
                         self.joints_callback)
        rospy.Subscriber("/moving_cube/odom", Odometry, self.odom_callback)

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()
        self.controllers_object = ControllersConnection(
            namespace="moving_cube")

    def init_internal_vars(self, init_roll_vel_value):
        self.roll_vel = [init_roll_vel_value]
        self.joints = None

    #always returns the current state of the joints
    def joints_callback(self, data):
        self.joints = data

    def odom_callback(self, data):
        self.odom = data

    def get_clock_time(self):
        self.clock_time = None
        while self.clock_time is None and not rospy.is_shutdown():
            try:
                self.clock_time = rospy.wait_for_message("/clock",
                                                         Clock,
                                                         timeout=1.0)
                rospy.logdebug("Current clock_time READY=>" +
                               str(self.clock_time))
            except:
                rospy.logdebug(
                    "Current clock_time not ready yet, retrying for getting Current clock_time"
                )
        return self.clock_time

    def _seed(self, seed=None):  #overriden function
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):  #overriden function

        # Take action
        if action == 0:  #LEFT
            rospy.logwarn("GO LEFT...")
            self.roll_vel[0] -= self.speed_step
        elif action == 1:  #RIGHT
            rospy.logwarn("GO RIGHT...")
            self.roll_vel[0] += self.speed_step
        elif action == 1:  #STOP
            rospy.logwarn("STOP...")
            self.roll_vel[0] = 0.0

        rospy.logwarn("MOVING TO SPEED==" + str(self.roll_vel))

        # 1st: unpause simulation
        rospy.logdebug("Unpause SIM...")
        self.gazebo.unpauseSim()

        self.move_joints(self.roll_vel)
        rospy.logdebug("Wait for some time to execute movement, time=" +
                       str(self.running_step))
        rospy.sleep(self.running_step)  #wait for some time
        rospy.logdebug("DONE Wait for some time to execute movement, time=" +
                       str(self.running_step))

        # 3rd: pause simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # 4th: get the observation
        observation, done, state = self.observation_checks()

        # 5th: get the reward
        if not done:
            step_reward = 0
            obs_reward = self.get_reward_for_observations(state)
            rospy.loginfo("Reward Values: Time=" + str(step_reward) + ",Obs=" +
                          str(obs_reward))
            reward = step_reward + int(obs_reward)
            rospy.loginfo("TOT Reward=" + str(reward))
        else:
            reward = -2000000

        return observation, reward, done, {}

    def _reset(self):

        rospy.logdebug("We UNPause the simulation to start having topic data")
        self.gazebo.unpauseSim()

        rospy.logdebug("CLOCK BEFORE RESET")
        self.get_clock_time()

        rospy.logdebug("SETTING INITIAL POSE TO AVOID")
        self.set_init_pose()
        time.sleep(self.wait_time * 2.0)
        rospy.logdebug("AFTER INITPOSE CHECKING SENSOR DATA")
        self.check_all_systems_ready()

        rospy.logdebug("RESETING SIMULATION")
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()
        rospy.logdebug("CLOCK AFTER RESET")
        self.get_clock_time()

        rospy.logdebug(
            "RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
        self.controllers_object.reset_cartpole_joint_controllers()
        rospy.logdebug("AFTER RESET CHECKING SENSOR DATA")
        self.check_all_systems_ready()
        rospy.logdebug("CLOCK AFTER SENSORS WORKING AGAIN")
        self.get_clock_time()
        rospy.logdebug("END")

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # get the last observation got when paused, generated by the callbakc or the check_all_systems_ready
        # Depends on who was last
        observation, _, state = self.observation_checks()

        return observation

    '''
    UTILITY CODE FOLLOWS HERE
    '''

    def observation_checks(self):
        done = False
        # State defined by the speed of the disk and position in the world plane
        self.get_distance_from_point(pstart, p_end)
        state = [
            round(self.joints.velocity[0], 1),
            round(self.odom.pose.pose.position.x, 1),
            round(self.odom.pose.pose.position.y, 1)
        ]
        # TODO: Create Correct Observation

        if (self.min_base_position >= state[0]
                or state[0] >= self.max_base_position
            ):  #check if the base is still within the ranges of (-2, 2)
            rospy.logerr("Base Ouside Limits==>min=" +
                         str(self.min_base_position) + ",pos=" +
                         str(state[0]) + ",max=" + str(self.max_base_position))
            done = True
        if (self.min_pole_angle >= state[2] or state[2] >=
                self.max_pole_angle):  #check if pole has toppled over
            rospy.logerr("Pole Angle Ouside Limits==>min=" +
                         str(self.min_pole_angle) + ",pos=" + str(state[2]) +
                         ",max=" + str(self.max_pole_angle))
            done = True

        observations = [state[2]]

        return observations, done, state

    def get_distance_from_point(self, pstart, p_end):
        """
        Given a Vector3 Object, get distance from current position
        :param p_end:
        :return:
        """
        a = numpy.array((pstart.x, pstart.y, pstart.z))
        b = numpy.array((p_end.x, p_end.y, p_end.z))

        distance = numpy.linalg.norm(a - b)

        return distance

    def get_reward_for_observations(self, state):
        """
        Gives more points for staying upright, gets data from given observations to avoid
        having different data than other previous functions
        :return:reward
        """

        pole_angle = state[2]
        pole_vel = state[3]

        rospy.logwarn("pole_angle for reward==>" + str(pole_angle))
        delta = 0.7 - abs(pole_angle)
        reward_pole_angle = math.exp(delta * 10)

        # If we are moving to the left and the pole is falling left is Bad
        rospy.logwarn("pole_vel==>" + str(pole_vel))
        pole_vel_sign = numpy.sign(pole_vel)
        pole_angle_sign = numpy.sign(pole_angle)
        rospy.logwarn("pole_vel sign==>" + str(pole_vel_sign))
        rospy.logwarn("pole_angle sign==>" + str(pole_angle_sign))

        # We want inverted signs for the speeds. We multiply by -1 to make minus positive.
        # global_sign + = GOOD, global_sign - = BAD
        base_reward = 500
        if pole_vel != 0:
            global_sign = pole_angle_sign * pole_vel_sign * -1
            reward_for_efective_movement = base_reward * global_sign
        else:
            # Is a particular case. If it doesnt move then its good also
            reward_for_efective_movement = base_reward

        reward = reward_pole_angle + reward_for_efective_movement

        rospy.logwarn("reward==>" + str(reward) + "= r_pole_angle=" +
                      str(reward_pole_angle) + ",r_movement= " +
                      str(reward_for_efective_movement))
        return reward

    def check_publishers_connection(self):
        """
        Checks that all the publishers are working
        :return:
        """
        rate = rospy.Rate(10)  # 10hz
        while (self._roll_vel_pub.get_num_connections() == 0
               and not rospy.is_shutdown()):
            rospy.logdebug(
                "No susbribers to _roll_vel_pub yet so we wait and try again")
            try:
                rate.sleep()
            except rospy.ROSInterruptException:
                # This is to avoid error when world is rested, time when backwards.
                pass
        rospy.logdebug("_base_pub Publisher Connected")

        rospy.logdebug("All Publishers READY")

    def check_all_systems_ready(self, init=True):
        self.disk_joints_data = None
        while self.disk_joints_data is None and not rospy.is_shutdown():
            try:
                self.disk_joints_data = rospy.wait_for_message(
                    "/moving_cube/joint_states", JointState, timeout=1.0)
                rospy.logdebug("Current moving_cube/joint_states READY=>" +
                               str(self.disk_joints_data))

            except:
                rospy.logerr(
                    "Current moving_cube/joint_states not ready yet, retrying for getting joint_states"
                )

        self.cube_odom_data = None
        while self.disk_joints_data is None and not rospy.is_shutdown():
            try:
                self.cube_odom_data = rospy.wait_for_message(
                    "/moving_cube/odom", Odometry, timeout=1.0)
                rospy.logdebug("Current /moving_cube/odom READY=>" +
                               str(self.cube_odom_data))

            except:
                rospy.logerr(
                    "Current /moving_cube/odom not ready yet, retrying for getting odom"
                )
        rospy.logdebug("ALL SYSTEMS READY")

    def move_joints(self, joints_array):
        i = 0
        for joint_value in joints_array:
            joint_value = Float64()
            joint_value.data = joints_array[0]
            rospy.logdebug("Single Disk Joints Velocity>>" + str(joint_value))
            self.publishers_array[i].publish(joint_value)
            i += 0

    def set_init_pose(self):
        """
        Sets Roll Disk to initial vel [0.0]
        :return:
        """
        self.check_publishers_connection()
        # Reset Internal pos variable
        self.init_internal_vars(self.init_roll_vel)
        self.move_joints(self.roll_vel)
예제 #7
0
if __name__ == '__main__':

    position = 0.0
    position = float(sys.argv[1])

    rospy.init_node('debug_test_node', anonymous=True, log_level=rospy.WARN)
    rospy.logwarn("[position=" + str(position) + "]")
    wait_time = 0.1
    controllers_object = ControllersConnection(namespace="cartpole_v0")

    debug_object = MoveCartClass()

    gazebo = GazeboConnection()
    rospy.loginfo("RESETING SIMULATION")
    gazebo.pauseSim()
    gazebo.resetSim()
    gazebo.unpauseSim()
    rospy.loginfo("CLOCK AFTER RESET")
    debug_object.get_clock_time()
    rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
    controllers_object.reset_cartpole_joint_controllers()
    rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
    debug_object.get_clock_time()
    rospy.loginfo("START CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("SET init pose...")
    debug_object.set_init_pose()
    rospy.loginfo("WAIT FOR GOING TO INIT POSE")
    time.sleep(wait_time)
class URSimDoorOpening(robot_gazebo_env_goal.RobotGazeboEnv):
    def __init__(self):
        #        rospy.logdebug("Starting URSimDoorOpening Class object...")

        # Init GAZEBO Objects
        self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state',
                                                SetModelState)
        self.get_world_state = rospy.ServiceProxy(
            '/gazebo/get_world_properties', GetWorldProperties)

        # Subscribe joint state and target pose
        rospy.Subscriber("/ft_sensor_topic", WrenchStamped,
                         self.wrench_stamped_callback)
        rospy.Subscriber("/joint_states", JointState,
                         self.joints_state_callback)
        rospy.Subscriber("/gazebo/link_states", LinkStates,
                         self.link_state_callback)
        rospy.Subscriber("/robotiq/rightcam/image_raw_right", Image,
                         self.r_image_callback)
        rospy.Subscriber("/robotiq/leftcam/image_raw_left", Image,
                         self.l_image_callback)

        # Gets training parameters from param server
        self.running_step = rospy.get_param("/running_step")
        self.observations = rospy.get_param("/observations")

        # Joint limitation
        shp_max = rospy.get_param("/joint_limits_array/shp_max")
        shp_min = rospy.get_param("/joint_limits_array/shp_min")
        shl_max = rospy.get_param("/joint_limits_array/shl_max")
        shl_min = rospy.get_param("/joint_limits_array/shl_min")
        elb_max = rospy.get_param("/joint_limits_array/elb_max")
        elb_min = rospy.get_param("/joint_limits_array/elb_min")
        wr1_max = rospy.get_param("/joint_limits_array/wr1_max")
        wr1_min = rospy.get_param("/joint_limits_array/wr1_min")
        wr2_max = rospy.get_param("/joint_limits_array/wr2_max")
        wr2_min = rospy.get_param("/joint_limits_array/wr2_min")
        wr3_max = rospy.get_param("/joint_limits_array/wr3_max")
        wr3_min = rospy.get_param("/joint_limits_array/wr3_min")
        self.joint_limits = {
            "shp_max": shp_max,
            "shp_min": shp_min,
            "shl_max": shl_max,
            "shl_min": shl_min,
            "elb_max": elb_max,
            "elb_min": elb_min,
            "wr1_max": wr1_max,
            "wr1_min": wr1_min,
            "wr2_max": wr2_max,
            "wr2_min": wr2_min,
            "wr3_max": wr3_max,
            "wr3_min": wr3_min
        }

        shp_init_value0 = rospy.get_param("/init_joint_pose0/shp")
        shl_init_value0 = rospy.get_param("/init_joint_pose0/shl")
        elb_init_value0 = rospy.get_param("/init_joint_pose0/elb")
        wr1_init_value0 = rospy.get_param("/init_joint_pose0/wr1")
        wr2_init_value0 = rospy.get_param("/init_joint_pose0/wr2")
        wr3_init_value0 = rospy.get_param("/init_joint_pose0/wr3")
        self.init_joint_pose0 = [
            shp_init_value0, shl_init_value0, elb_init_value0, wr1_init_value0,
            wr2_init_value0, wr3_init_value0
        ]

        shp_init_value1 = rospy.get_param("/init_joint_pose1/shp")
        shl_init_value1 = rospy.get_param("/init_joint_pose1/shl")
        elb_init_value1 = rospy.get_param("/init_joint_pose1/elb")
        wr1_init_value1 = rospy.get_param("/init_joint_pose1/wr1")
        wr2_init_value1 = rospy.get_param("/init_joint_pose1/wr2")
        wr3_init_value1 = rospy.get_param("/init_joint_pose1/wr3")
        self.init_joint_pose1 = [
            shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1,
            wr2_init_value1, wr3_init_value1
        ]

        shp_init_value2 = rospy.get_param("/init_joint_pose2/shp")
        shl_init_value2 = rospy.get_param("/init_joint_pose2/shl")
        elb_init_value2 = rospy.get_param("/init_joint_pose2/elb")
        wr1_init_value2 = rospy.get_param("/init_joint_pose2/wr1")
        wr2_init_value2 = rospy.get_param("/init_joint_pose2/wr2")
        wr3_init_value2 = rospy.get_param("/init_joint_pose2/wr3")
        self.init_joint_pose2 = [
            shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2,
            wr2_init_value2, wr3_init_value2
        ]

        r_drv_value1 = rospy.get_param("/init_grp_pose1/r_drive")
        l_drv_value1 = rospy.get_param("/init_grp_pose1/l_drive")
        r_flw_value1 = rospy.get_param("/init_grp_pose1/r_follower")
        l_flw_value1 = rospy.get_param("/init_grp_pose1/l_follower")
        r_spr_value1 = rospy.get_param("/init_grp_pose1/r_spring")
        l_spr_value1 = rospy.get_param("/init_grp_pose1/l_spring")

        r_drv_value2 = rospy.get_param("/init_grp_pose2/r_drive")
        l_drv_value2 = rospy.get_param("/init_grp_pose2/l_drive")
        r_flw_value2 = rospy.get_param("/init_grp_pose2/r_follower")
        l_flw_value2 = rospy.get_param("/init_grp_pose2/l_follower")
        r_spr_value2 = rospy.get_param("/init_grp_pose2/r_spring")
        l_spr_value2 = rospy.get_param("/init_grp_pose2/l_spring")

        init_pos0 = self.init_joints_pose(self.init_joint_pose0)
        self.arr_init_pos0 = np.array(init_pos0, dtype='float32')
        init_pos1 = self.init_joints_pose(self.init_joint_pose1)
        self.arr_init_pos1 = np.array(init_pos1, dtype='float32')
        init_pos2 = self.init_joints_pose(self.init_joint_pose2)
        self.arr_init_pos2 = np.array(init_pos2, dtype='float32')

        self.init_grp_pose1 = [
            r_drv_value1, l_drv_value1, r_flw_value1, l_flw_value1,
            r_spr_value1, l_spr_value1
        ]
        self.init_grp_pose2 = [
            r_drv_value2, l_drv_value2, r_flw_value2, l_flw_value2,
            r_spr_value2, l_spr_value2
        ]

        init_g_pos1 = self.init_joints_pose(self.init_grp_pose1)
        self.arr_init_g_pos1 = np.array(init_g_pos1, dtype='float32')
        init_g_pos2 = self.init_joints_pose(self.init_grp_pose2)
        self.arr_init_g_pos2 = np.array(init_g_pos2, dtype='float32')

        # Fill in the Done Episode Criteria list
        self.episode_done_criteria = rospy.get_param("/episode_done_criteria")

        # stablishes connection with simulator
        self._gz_conn = GazeboConnection()
        self._ctrl_conn = ControllersConnection(namespace="")

        # Controller type for ros_control
        self._ctrl_type = rospy.get_param("/control_type")
        self.pre_ctrl_type = self._ctrl_type

        # Get the force and troque limit
        self.force_limit = rospy.get_param("/force_limit")
        self.torque_limit = rospy.get_param("/torque_limit")

        # Get tolerances of door_frame
        self.tolerances = rospy.get_param("/door_frame_tolerances")

        # Get observation parameters
        self.joint_n = rospy.get_param("/obs_params/joint_n")
        self.eef_n = rospy.get_param("/obs_params/eef_n")
        self.eef_rpy_n = rospy.get_param("/obs_params/eef_rpy_n")
        self.force_n = rospy.get_param("/obs_params/force_n")
        self.torque_n = rospy.get_param("/obs_params/torque_n")
        self.image_n = rospy.get_param("/obs_params/image_n")
        self.min_static_limit = rospy.get_param("/min_static_limit")
        self.max_static_limit = rospy.get_param("/max_static_limit")

        # We init the observations
        self.base_orientation = Quaternion()
        self.imu_link = Quaternion()
        self.door = Quaternion()
        self.door_frame = Point()
        self.quat = Quaternion()
        self.imu_link_rpy = Vector3()
        self.door_rpy = Vector3()
        self.link_state = LinkStates()
        self.wrench_stamped = WrenchStamped()
        self.joints_state = JointState()
        self.right_image = Image()
        self.right_image_ini = []
        self.left_image = Image()
        self.lift_image_ini = []
        self.end_effector = Point()
        self.previous_action = copy.deepcopy(self.arr_init_pos2)
        self.counter = 0
        self.max_rewards = 1

        # Arm/Control parameters
        self._ik_params = setups['UR5_6dof']['ik_params']

        # ROS msg type
        self._joint_pubisher = JointPub()
        self._joint_traj_pubisher = JointTrajPub()

        # Gym interface and action
        self.action_space = spaces.Discrete(n_act)
        self.observation_space = obs_dim  #np.arange(self.get_observations().shape[0])
        self.reward_range = (-np.inf, np.inf)
        self._seed()

        # Change the controller type
        set_joint_pos_server = rospy.Service('/set_position_controller',
                                             SetBool, self._set_pos_ctrl)
        set_joint_traj_pos_server = rospy.Service(
            '/set_trajectory_position_controller', SetBool,
            self._set_traj_pos_ctrl)
        set_joint_vel_server = rospy.Service('/set_velocity_controller',
                                             SetBool, self._set_vel_ctrl)
        set_joint_traj_vel_server = rospy.Service(
            '/set_trajectory_velocity_controller', SetBool,
            self._set_traj_vel_ctrl)

        self.pos_traj_controller = [
            'joint_state_controller', 'gripper_controller',
            'pos_traj_controller'
        ]
        self.pos_controller = [
            "joint_state_controller", "gripper_controller",
            "ur_shoulder_pan_pos_controller",
            "ur_shoulder_lift_pos_controller", "ur_elbow_pos_controller",
            "ur_wrist_1_pos_controller", "ur_wrist_2_pos_controller",
            "ur_wrist_3_pos_controller"
        ]
        self.vel_traj_controller = [
            'joint_state_controller', 'gripper_controller',
            'vel_traj_controller'
        ]
        self.vel_controller = [
            "joint_state_controller", "gripper_controller",
            "ur_shoulder_pan_vel_controller",
            "ur_shoulder_lift_vel_controller", "ur_elbow_vel_controller",
            "ur_wrist_1_vel_controller", "ur_wrist_2_vel_controller",
            "ur_wrist_3_vel_controller"
        ]

        # Helpful False
        self.stop_flag = False
        stop_trainning_server = rospy.Service('/stop_training', SetBool,
                                              self._stop_trainnig)
        start_trainning_server = rospy.Service('/start_training', SetBool,
                                               self._start_trainnig)

    def check_stop_flg(self):
        if self.stop_flag is False:
            return False
        else:
            return True

    def _start_trainnig(self, req):
        rospy.logdebug("_start_trainnig!!!!")
        self.stop_flag = False
        return SetBoolResponse(True, "_start_trainnig")

    def _stop_trainnig(self, req):
        rospy.logdebug("_stop_trainnig!!!!")
        self.stop_flag = True
        return SetBoolResponse(True, "_stop_trainnig")

    def _set_pos_ctrl(self, req):
        rospy.wait_for_service('set_position_controller')
        self._ctrl_conn.stop_controllers(self.pos_traj_controller)
        self._ctrl_conn.start_controllers(self.pos_controller)
        self._ctrl_type = 'pos'
        return SetBoolResponse(True, "_set_pos_ctrl")

    def _set_traj_pos_ctrl(self, req):
        rospy.wait_for_service('set_trajectory_position_controller')
        self._ctrl_conn.stop_controllers(self.pos_controller)
        self._ctrl_conn.start_controllers(self.pos_traj_controller)
        self._ctrl_type = 'traj_pos'
        return SetBoolResponse(True, "_set_traj_pos_ctrl")

    def _set_vel_ctrl(self, req):
        rospy.wait_for_service('set_velocity_controller')
        self._ctrl_conn.stop_controllers(self.vel_traj_controller)
        self._ctrl_conn.start_controllers(self.vel_controller)
        self._ctrl_type = 'vel'
        return SetBoolResponse(True, "_set_vel_ctrl")

    def _set_traj_vel_ctrl(self, req):
        rospy.wait_for_service('set_trajectory_velocity_controller')
        self._ctrl_conn.stop_controllers(self.vel_controller)
        self._ctrl_conn.start_controllers(self.vel_traj_controller)
        self._ctrl_type = 'traj_vel'
        return SetBoolResponse(True, "_set_traj_vel_ctrl")

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def check_all_systems_ready(self):
        """
        We check that all systems are ready
        :return:
        """
        joint_states_msg = None
        while joint_states_msg is None and not rospy.is_shutdown():
            try:
                joint_states_msg = rospy.wait_for_message("/joint_states",
                                                          JointState,
                                                          timeout=0.1)
                self.joints_state = joint_states_msg
                rospy.logdebug("Current joint_states READY")
            except Exception as e:
                self._ctrl_conn.start_controllers(
                    controllers_on="joint_state_controller")
                rospy.logdebug(
                    "Current joint_states not ready yet, retrying==>" + str(e))

        link_states_msg = None
        while link_states_msg is None and not rospy.is_shutdown():
            try:
                link_states_msg = rospy.wait_for_message("/gazebo/link_states",
                                                         LinkStates,
                                                         timeout=0.1)
                self.link_states = link_states_msg
                rospy.logdebug("Reading link_states READY")
            except Exception as e:
                rospy.logdebug(
                    "Reading link_states not ready yet, retrying==>" + str(e))

        rospy.logdebug("ALL SYSTEMS READY")

    def get_xyz(self, q):
        """Get x,y,z coordinates 
        Args:
            q: a numpy array of joints angle positions.
        Returns:
            xyz are the x,y,z coordinates of an end-effector in a Cartesian space.
        """
        mat = ur_utils.forward(q, self._ik_params)
        xyz = mat[:3, 3]
        return xyz

    def get_current_xyz(self):
        """Get x,y,z coordinates according to currrent joint angles
        Returns:
        xyz are the x,y,z coordinates of an end-effector in a Cartesian space.
        """
        joint_states = self.joints_state
        shp_joint_ang = joint_states.position[0]
        shl_joint_ang = joint_states.position[1]
        elb_joint_ang = joint_states.position[2]
        wr1_joint_ang = joint_states.position[3]
        wr2_joint_ang = joint_states.position[4]
        wr3_joint_ang = joint_states.position[5]

        q = [
            shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang,
            wr2_joint_ang, wr3_joint_ang
        ]
        mat = ur_utils.forward(q, self._ik_params)
        xyz = mat[:3, 3]
        return xyz

    def get_orientation(self, q):
        """Get Euler angles 
        Args:
            q: a numpy array of joints angle positions.
        Returns:
            xyz are the x,y,z coordinates of an end-effector in a Cartesian space.
        """
        mat = ur_utils.forward(q, self._ik_params)
        orientation = mat[0:3, 0:3]
        roll = -orientation[1, 2]
        pitch = orientation[0, 2]
        yaw = -orientation[0, 1]

        return Vector3(roll, pitch, yaw)

    def cvt_quat_to_euler(self, quat):
        euler_rpy = Vector3()
        euler = euler_from_quaternion(
            [self.quat.x, self.quat.y, self.quat.z, self.quat.w])

        euler_rpy.x = euler[0]
        euler_rpy.y = euler[1]
        euler_rpy.z = euler[2]
        return euler_rpy

    def init_joints_pose(self, init_pos):
        """
        We initialise the Position variable that saves the desired position where we want our
        joints to be
        :param init_pos:
        :return:
        """
        self.current_joint_pose = []
        self.current_joint_pose = copy.deepcopy(init_pos)
        return self.current_joint_pose

    def get_euclidean_dist(self, p_in, p_pout):
        """
        Given a Vector3 Object, get distance from current position
        :param p_end:
        :return:
        """
        a = numpy.array((p_in.x, p_in.y, p_in.z))
        b = numpy.array((p_pout.x, p_pout.y, p_pout.z))

        distance = numpy.linalg.norm(a - b)

        return distance

    def joints_state_callback(self, msg):
        self.joints_state = msg

    def wrench_stamped_callback(self, msg):
        self.wrench_stamped = msg

    def link_state_callback(self, msg):
        self.link_state = msg
        self.end_effector = self.link_state.pose[12]
        self.imu_link = self.link_state.pose[5]
        self.door_frame = self.link_state.pose[1]
        self.door = self.link_state.pose[2]

    def r_image_callback(self, msg):
        self.right_image = msg

    def l_image_callback(self, msg):
        self.left_image = msg

    def get_observations(self):
        """
        Returns the state of the robot needed for OpenAI QLearn Algorithm
        The state will be defined by an array
        :return: observation
        """
        joint_states = self.joints_state
        eef_rpy = Vector3()

        self.force = self.wrench_stamped.wrench.force
        self.torque = self.wrench_stamped.wrench.torque
        #        print("[force]", self.force.x, self.force.y, self.force.z)
        #        print("[torque]", self.torque.x, self.torque.y, self.torque.z)

        shp_joint_ang = joint_states.position[2]
        shl_joint_ang = joint_states.position[1]
        elb_joint_ang = joint_states.position[0]
        wr1_joint_ang = joint_states.position[9]
        wr2_joint_ang = joint_states.position[10]
        wr3_joint_ang = joint_states.position[11]

        shp_joint_vel = joint_states.velocity[2]
        shl_joint_vel = joint_states.velocity[1]
        elb_joint_vel = joint_states.velocity[0]
        wr1_joint_vel = joint_states.velocity[9]
        wr2_joint_vel = joint_states.velocity[10]
        wr3_joint_vel = joint_states.velocity[11]

        q = [
            shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang,
            wr2_joint_ang, wr3_joint_ang
        ]
        eef_x, eef_y, eef_z = self.get_xyz(q)
        eef_x_ini, eef_y_ini, eef_z_ini = self.get_xyz(self.init_joint_pose2)

        eef_rpy = self.get_orientation(q)
        eef_rpy_ini = self.get_orientation(self.init_joint_pose2)
        r_image = self.right_image
        l_image = self.left_image

        observation = []
        #        rospy.logdebug("List of Observations==>"+str(self.observations))
        for obs_name in self.observations:
            if obs_name == "shp_joint_ang":
                observation.append(
                    (shp_joint_ang - self.init_joint_pose2[0]) * self.joint_n)
            elif obs_name == "shl_joint_ang":
                observation.append(
                    (shl_joint_ang - self.init_joint_pose2[1]) * self.joint_n)
            elif obs_name == "elb_joint_ang":
                observation.append(
                    (elb_joint_ang - self.init_joint_pose2[2]) * self.joint_n)
            elif obs_name == "wr1_joint_ang":
                observation.append(
                    (wr1_joint_ang - self.init_joint_pose2[3]) * self.joint_n)
            elif obs_name == "wr2_joint_ang":
                observation.append(
                    (wr2_joint_ang - self.init_joint_pose2[4]) * self.joint_n)
            elif obs_name == "wr3_joint_ang":
                observation.append(
                    (wr3_joint_ang - self.init_joint_pose2[5]) * self.joint_n)
            elif obs_name == "shp_joint_vel":
                observation.append(shp_joint_vel)
            elif obs_name == "shl_joint_vel":
                observation.append(shl_joint_vel)
            elif obs_name == "elb_joint_vel":
                observation.append(elb_joint_vel)
            elif obs_name == "wr1_joint_vel":
                observation.append(wr1_joint_vel)
            elif obs_name == "wr2_joint_vel":
                observation.append(wr2_joint_vel)
            elif obs_name == "wr3_joint_vel":
                observation.append(wr3_joint_vel)
            elif obs_name == "eef_x":
                observation.append((eef_x - eef_x_ini) * self.eef_n)
            elif obs_name == "eef_y":
                observation.append((eef_y - eef_y_ini) * self.eef_n)
            elif obs_name == "eef_z":
                observation.append((eef_z - eef_z_ini) * self.eef_n)
            elif obs_name == "eef_rpy_x":
                observation.append(
                    (eef_rpy.x - eef_rpy_ini.x) * self.eef_rpy_n)
            elif obs_name == "eef_rpy_y":
                observation.append(
                    (eef_rpy.y - eef_rpy_ini.y) * self.eef_rpy_n)
            elif obs_name == "eef_rpy_z":
                observation.append(
                    (eef_rpy.z - eef_rpy_ini.z) * self.eef_rpy_n)
            elif obs_name == "force_x":
                observation.append((self.force.x - self.force_ini.x) /
                                   self.force_limit * self.force_n)
            elif obs_name == "force_y":
                observation.append((self.force.y - self.force_ini.y) /
                                   self.force_limit * self.force_n)
            elif obs_name == "force_z":
                observation.append((self.force.z - self.force_ini.z) /
                                   self.force_limit * self.force_n)
            elif obs_name == "torque_x":
                observation.append((self.torque.x - self.torque_ini.x) /
                                   self.torque_limit * self.torque_n)
            elif obs_name == "torque_y":
                observation.append((self.torque.y - self.torque_ini.y) /
                                   self.torque_limit * self.torque_n)
            elif obs_name == "torque_z":
                observation.append((self.torque.z - self.torque_ini.z) /
                                   self.torque_limit * self.torque_n)
            elif obs_name == "image_data":
                for x in range(0, 28):
                    observation.append(
                        (ord(r_image.data[x]) -
                         ord(self.right_image_ini.data[x])) * self.image_n)
                for x in range(0, 28):
                    observation.append(
                        (ord(l_image.data[x]) -
                         ord(self.left_image_ini.data[x])) * self.image_n)
            else:
                raise NameError('Observation Asked does not exist==' +
                                str(obs_name))
#        print("observation", list(map(round, observation, [3]*len(observation))))

        return observation

    def clamp_to_joint_limits(self):
        """
        clamps self.current_joint_pose based on the joint limits
        self._joint_limits
        {
         "shp_max": shp_max,
         "shp_min": shp_min,
         ...
         }
        :return:
        """

        rospy.logdebug("Clamping current_joint_pose>>>" +
                       str(self.current_joint_pose))
        shp_joint_value = self.current_joint_pose[0]
        shl_joint_value = self.current_joint_pose[1]
        elb_joint_value = self.current_joint_pose[2]
        wr1_joint_value = self.current_joint_pose[3]
        wr2_joint_value = self.current_joint_pose[4]
        wr3_joint_value = self.current_joint_pose[5]

        self.current_joint_pose[0] = max(
            min(shp_joint_value, self._joint_limits["shp_max"]),
            self._joint_limits["shp_min"])
        self.current_joint_pose[1] = max(
            min(shl_joint_value, self._joint_limits["shl_max"]),
            self._joint_limits["shl_min"])
        self.current_joint_pose[2] = max(
            min(elb_joint_value, self._joint_limits["elb_max"]),
            self._joint_limits["elb_min"])
        self.current_joint_pose[3] = max(
            min(wr1_joint_value, self._joint_limits["wr1_max"]),
            self._joint_limits["wr1_min"])
        self.current_joint_pose[4] = max(
            min(wr2_joint_value, self._joint_limits["wr2_max"]),
            self._joint_limits["wr2_min"])
        self.current_joint_pose[5] = max(
            min(wr3_joint_value, self._joint_limits["wr3_max"]),
            self._joint_limits["wr3_min"])

        rospy.logdebug("DONE Clamping current_joint_pose>>>" +
                       str(self.current_joint_pose))

    def first_reset(self):
        #        print("first reset")
        jointtrajpub = JointTrajPub()
        for update in range(500):
            jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos0)
        time.sleep(0.5)
        for update in range(300):
            jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos1)
        time.sleep(0.5)

    # Resets the state of the environment and returns an initial observation.
    def reset(self):
        self.max_knob_rotation = 0
        self.max_door_rotation = 0
        self.max_wirst3 = 0
        self.min_wirst3 = 0
        self.max_wirst2 = 0
        self.min_wirst2 = 0
        self.max_wirst1 = 0
        self.min_wirst1 = 0
        self.max_elb = 0
        self.min_elb = 0
        self.max_shl = 0
        self.min_shl = 0
        self.max_shp = 0
        self.min_shp = 0
        self.max_force_x = 0
        self.min_force_x = 0
        self.max_force_y = 0
        self.min_force_y = 0
        self.max_force_z = 0
        self.min_force_z = 0
        self.max_torque_x = 0
        self.min_torque_x = 0
        self.max_torque_y = 0
        self.min_torque_y = 0
        self.max_torque_z = 0
        self.min_torque_z = 0
        # Go to initial position
        self._gz_conn.unpauseSim()
        #        rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose0))
        jointtrajpub = JointTrajPub()
        for update in range(200):
            jointtrajpub.GrpCommand(self.arr_init_g_pos1)
#        time.sleep(1)
        for update in range(300):
            jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos2)
        time.sleep(1)
        for update in range(300):
            jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos1)
        time.sleep(1)

        # 0st: We pause the Simulator
        #        rospy.logdebug("Pausing SIM...")
        self._gz_conn.pauseSim()

        # 1st: resets the simulation to initial values
        #        rospy.logdebug("Reset SIM...")
        self._gz_conn.resetSim()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        #        rospy.logdebug("Remove Gravity...")
        self._gz_conn.change_gravity_zero()

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        #        rospy.logdebug("reset_ur_joint_controllers...")
        self._ctrl_conn.reset_ur_joint_controllers(self._ctrl_type)

        # 3rd: resets the robot to initial conditions
        #        rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose1))
        #        rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose2))

        self.force = self.wrench_stamped.wrench.force
        self.torque = self.wrench_stamped.wrench.torque
        #        print("self.force", self.force)
        #        print("self.torque", self.torque)

        self.force_ini = copy.deepcopy(self.force)
        self.torque_ini = copy.deepcopy(self.torque)

        # We save that position as the current joint desired position

        # 4th: We Set the init pose to the jump topic so that the jump control can update
        # We check the jump publisher has connection

        if self._ctrl_type == 'traj_pos':
            self._joint_traj_pubisher.check_publishers_connection()
        elif self._ctrl_type == 'pos':
            self._joint_pubisher.check_publishers_connection()
        elif self._ctrl_type == 'traj_vel':
            self._joint_traj_pubisher.check_publishers_connection()
        elif self._ctrl_type == 'vel':
            self._joint_pubisher.check_publishers_connection()
        else:
            rospy.logwarn("Controller type is wrong!!!!")

        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
#        rospy.logdebug("check_all_systems_ready...")
        self.check_all_systems_ready()

        # 6th: We restore the gravity to original
        #        rospy.logdebug("Restore Gravity...")
        self._gz_conn.adjust_gravity()

        for update in range(300):
            jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos2)
        time.sleep(1)
        for update in range(200):
            jointtrajpub.GrpCommand(self.arr_init_g_pos2)
        time.sleep(1)

        # 7th: pauses simulation
        #        rospy.logdebug("Pause SIM...")
        self._gz_conn.pauseSim()

        self.right_image_ini = copy.deepcopy(self.right_image)
        self.left_image_ini = copy.deepcopy(self.left_image)

        # 8th: Get the State Discrete Stringuified version of the observations
        #        rospy.logdebug("get_observations...")
        observation = self.get_observations()
        #        print("[observations]", observation)

        return observation

    def _act(self, action):
        if self._ctrl_type == 'traj_pos':
            self.pre_ctrl_type = 'traj_pos'
            self._joint_traj_pubisher.jointTrajectoryCommand(action)
        elif self._ctrl_type == 'pos':
            self.pre_ctrl_type = 'pos'
            self._joint_pubisher.move_joints(action)
        elif self._ctrl_type == 'traj_vel':
            self.pre_ctrl_type = 'traj_vel'
            self._joint_traj_pubisher.jointTrajectoryCommand(action)
        elif self._ctrl_type == 'vel':
            self.pre_ctrl_type = 'vel'
            self._joint_pubisher.move_joints(action)
        else:
            self._joint_pubisher.move_joints(action)

    def training_ok(self):
        rate = rospy.Rate(1)
        while self.check_stop_flg() is True:
            rospy.logdebug("stop_flag is ON!!!!")
            self._gz_conn.unpauseSim()

            if self.check_stop_flg() is False:
                break
            rate.sleep()

    def step(self, action, update):
        '''
        ('action: ', array([ 0.,  0. , -0., -0., -0. , 0. ], dtype=float32))        
        '''
        #        rospy.logdebug("UR step func")	# define the logger
        self.training_ok()

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot
        # Act
        self._gz_conn.unpauseSim()

        if self.max_wirst3 < action[5]:
            self.max_wirst3 = action[5]
        if self.min_wirst3 > action[5]:
            self.min_wirst3 = action[5]
        if self.max_wirst2 < action[4]:
            self.max_wirst2 = action[4]
        if self.min_wirst2 > action[4]:
            self.min_wirst2 = action[4]
        if self.max_wirst1 < action[3]:
            self.max_wirst1 = action[3]
        if self.min_wirst1 > action[3]:
            self.min_wirst1 = action[3]
        if self.max_elb < action[2]:
            self.max_elb = action[2]
        if self.min_elb > action[2]:
            self.min_elb = action[2]
        if self.max_shl < action[1]:
            self.max_shl = action[1]
        if self.min_shl > action[1]:
            self.min_shl = action[1]
        if self.max_shp < action[0]:
            self.max_shp = action[0]
        if self.min_shp > action[0]:
            self.min_shp = action[0]

#        print("action", action)

        action = action + self.arr_init_pos2
        #        action = [1.488122534496775, -1.4496597816566892, 2.4377209990850974, 2.168370898415174, -1.4670589583209175, 1.4]
        self._act(action)

        self.wrench_stamped
        self.force = self.wrench_stamped.wrench.force
        self.torque = self.wrench_stamped.wrench.torque
        #        print("force", self.force)
        #        print("torque", self.torque)

        if self.max_force_x < self.force.x:
            self.max_force_x = self.force.x
        if self.min_force_x > self.force.x:
            self.min_force_x = self.force.x
        if self.max_force_y < self.force.y:
            self.max_force_y = self.force.y
        if self.min_force_y > self.force.y:
            self.min_force_y = self.force.y
        if self.max_force_z < self.force.z:
            self.max_force_z = self.force.z
        if self.min_force_z > self.force.z:
            self.min_force_z = self.force.z
        if self.max_torque_x < self.torque.x:
            self.max_torque_x = self.torque.x
        if self.min_torque_x > self.torque.x:
            self.min_torque_x = self.torque.x
        if self.max_torque_y < self.torque.y:
            self.max_torque_y = self.torque.y
        if self.min_torque_y > self.torque.y:
            self.min_torque_y = self.torque.y
        if self.max_torque_z < self.torque.z:
            self.max_torque_z = self.torque.z
        if self.min_torque_z > self.torque.z:
            self.min_torque_z = self.torque.z

        if self.force_limit < self.force.x or self.force.x < -self.force_limit:
            self._act(self.previous_action)
#        	print("force.x over the limit")
        elif self.force_limit < self.force.y or self.force.y < -self.force_limit:
            self._act(self.previous_action)
#        	print("force.y over the limit")
        elif self.force_limit < self.force.z or self.force.z < -self.force_limit:
            self._act(self.previous_action)
#        	print("force.z over the limit")
        elif self.torque_limit < self.torque.x or self.torque.x < -self.torque_limit:
            self._act(self.previous_action)
#        	print("torque.x over the limit")
        elif self.torque_limit < self.torque.y or self.torque.y < -self.torque_limit:
            self._act(self.previous_action)
#        	print("torque.y over the limit")
        elif self.torque_limit < self.torque.z or self.torque.z < -self.torque_limit:
            self._act(self.previous_action)
#        	print("torque.z over the limit")
        else:
            self.previous_action = copy.deepcopy(action)
#        	print("True")

        self.min_static_taxel0 = 0
        self.min_static_taxel1 = 0
        self.max_static_taxel0 = 0
        self.max_static_taxel1 = 0
        r_image = self.right_image
        l_image = self.left_image

        for x in range(0, 28):
            if self.min_static_taxel0 > (ord(r_image.data[x]) - ord(
                    self.right_image_ini.data[x])) * self.image_n:
                self.min_static_taxel0 = (ord(r_image.data[x]) - ord(
                    self.right_image_ini.data[x])) * self.image_n
            if self.min_static_taxel1 > (ord(l_image.data[x]) - ord(
                    self.left_image_ini.data[x])) * self.image_n:
                self.min_static_taxel1 = (ord(l_image.data[x]) - ord(
                    self.left_image_ini.data[x])) * self.image_n
            if self.max_static_taxel0 < (ord(r_image.data[x]) - ord(
                    self.right_image_ini.data[x])) * self.image_n:
                self.max_static_taxel0 = (ord(r_image.data[x]) - ord(
                    self.right_image_ini.data[x])) * self.image_n
            if self.max_static_taxel1 < (ord(l_image.data[x]) - ord(
                    self.left_image_ini.data[x])) * self.image_n:
                self.max_static_taxel1 = (ord(l_image.data[x]) - ord(
                    self.left_image_ini.data[x])) * self.image_n
#        print("min, max taxel", self.min_static_taxel0, self.max_static_taxel0, self.min_static_taxel1, self.max_static_taxel1)

# Then we send the command to the robot and let it go for running_step seconds
        time.sleep(self.running_step)
        self._gz_conn.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        observation = self.get_observations()

        # finally we get an evaluation based on what happened in the sim
        reward = self.compute_dist_rewards(action, update)
        done = self.check_done(update)

        return observation, reward, done, {}

    def compute_dist_rewards(self, action, update):
        self.quat = self.door.orientation
        self.door_rpy = self.cvt_quat_to_euler(self.quat)
        self.quat = self.imu_link.orientation
        self.imu_link_rpy = self.cvt_quat_to_euler(self.quat)
        compute_rewards = 0

        knob_c = 100  #1 rotation of knob(+)
        knob_bonus_c = 10  #2 bonus of knob rotation(+)
        panel_c = 50  #3 door panel open(+)
        panel_b_c = 50  #4 door panel before open(-)
        tolerances_c = 50  #5 movement of door frame(-)
        force_c = 1  #6 over force limit1(-)
        taxel_c = 100  #7 release the knob(-)
        act_0_n = 10  #8 action[0] (-)
        act_1_n = 10  #  action[1] (-)
        act_2_n = 10  #  action[2] (-)
        act_3_n = 10  #  action[3] (-)
        act_4_n = 10  #  action[4] (-)
        act_5_n = 10  #  action[5] (-)

        #1 rotation of knob(+), #2 bonus of knob rotation(+), #3 door panel open(+),
        if self.imu_link_rpy.x < 0.8:
            compute_rewards = self.imu_link_rpy.x * knob_c
            print("reward_knob_rotation", compute_rewards)
            if 0.4 > self.imu_link_rpy.x > 0.2:
                compute_rewards = self.imu_link_rpy.x * knob_c + knob_bonus_c
            elif 0.6 > self.imu_link_rpy.x > 0.4:
                compute_rewards = self.imu_link_rpy.x * knob_c + knob_bonus_c * 2
            elif 0.8 > self.imu_link_rpy.x > 0.6:
                compute_rewards = self.imu_link_rpy.x * knob_c + knob_bonus_c * 3
        else:
            compute_rewards = 0.8 * knob_c + knob_bonus_c * 3 + (
                1.5708061 - self.imu_link_rpy.z) * panel_c

        #5 movement of door frame(-)
        if abs(self.door_frame.position.x + 0.0659) > self.tolerances or abs(
                self.door_frame.position.y -
                0.5649) > self.tolerances or abs(self.door_frame.position.z -
                                                 0.0999) > self.tolerances:
            compute_rewards -= tolerances_c * (n_step -
                                               update) / n_step + tolerances_c
            print("door_frame limit", compute_rewards)

        #6 over force limit1(-)
        if self.force_limit < self.force.x or self.force.x < -self.force_limit:
            compute_rewards -= force_c * (n_step - update) / n_step + force_c
            print("force_x limit", compute_rewards)
        if self.force_limit < self.force.y or self.force.y < -self.force_limit:
            compute_rewards -= force_c * (n_step - update) / n_step + force_c
            print("force_y limit", compute_rewards)
        if self.force_limit < self.force.z or self.force.z < -self.force_limit:
            compute_rewards -= force_c * (n_step - update) / n_step + force_c
            print("force_z limit", compute_rewards)
        if self.torque_limit < self.torque.x or self.torque.x < -self.torque_limit:
            compute_rewards -= force_c * (n_step - update) / n_step + force_c
            print("torque_x limit", compute_rewards)
        if self.torque_limit < self.torque.y or self.torque.y < -self.torque_limit:
            compute_rewards -= force_c * (n_step - update) / n_step + force_c
            print("torque_y limit", compute_rewards)
        if self.torque_limit < self.torque.z or self.torque.z < -self.torque_limit:
            compute_rewards -= force_c * (n_step - update) / n_step + force_c
            print("torque_z limit", compute_rewards)

        #7 release the knob(-)
        if self.min_static_taxel0 < self.min_static_limit and self.min_static_taxel1 < self.min_static_limit:
            compute_rewards -= taxel_c * (n_step - update) / n_step + taxel_c
            print("min_static limit", compute_rewards)
        if self.max_static_taxel0 > self.max_static_limit and self.max_static_taxel1 > self.max_static_limit:
            compute_rewards -= taxel_c * (n_step - update) / n_step + taxel_c
            print("max_static limit", compute_rewards)

        #8 joint(+, -)
        action = action - self.arr_init_pos2
        if action[5] < -0.005:
            compute_rewards -= (-0.005 - action[5]) * act_5_n
            print("action5 limit", compute_rewards)
        elif 1 < action[5]:
            compute_rewards -= (action[5] - 1) * act_5_n
            print("action5 limit", compute_rewards)
        if action[4] < -0.005:
            compute_rewards -= (-0.005 - action[4]) * act_4_n
            print("action4 limit", compute_rewards)
        elif 0.03 < action[4]:
            compute_rewards -= (action[4] - 0.03) * act_4_n
            print("action4 limit", compute_rewards)
        if action[3] < -0.023:
            compute_rewards -= (-0.023 - action[3]) * act_3_n
            print("action3 limit", compute_rewards)
        elif 0.005 < action[3]:
            compute_rewards -= (action[3] - 0.005) * act_3_n
            print("action3 limit", compute_rewards)
        if action[2] < -0.005:
            compute_rewards -= (-0.005 - action[2]) * act_2_n
            print("action2 limit", compute_rewards)
        elif 0.14 < action[2]:
            compute_rewards -= (action[2] - 0.14) * act_2_n
            print("action2 limit", compute_rewards)
        if action[1] < -0.11:
            compute_rewards -= (-0.11 - action[1]) * act_1_n
            print("action1 limit", compute_rewards)
        elif 0.005 < action[1]:
            compute_rewards -= (action[1] - 0.005) * act_1_n
            print("action1 limit", compute_rewards)
        if action[0] < -0.015:
            compute_rewards -= (-0.015 - action[0]) * act_0_n
            print("action0 limit", compute_rewards)
        if 0.005 < action[0]:
            compute_rewards -= (action[0] - 0.005) * act_0_n
            print("action0 limit", compute_rewards)

        if self.max_knob_rotation < self.imu_link_rpy.x:
            self.max_knob_rotation = self.imu_link_rpy.x
        if self.max_door_rotation < 1.5708061 - self.imu_link_rpy.z:
            self.max_door_rotation = 1.5708061 - self.imu_link_rpy.z
#        print("imu_link_rpy", self.imu_link_rpy)
#        print("door_frame", self.door_frame.position.x + 0.0659, self.door_frame.position.y - 0.5649, self.door_frame.position.z - 0.0999)

        return compute_rewards

    def check_done(self, update):
        if update > 1:
            if abs(self.door_frame.position.x + 0.0659
                   ) > self.tolerances or abs(self.door_frame.position.y -
                                              0.5649) > self.tolerances or abs(
                                                  self.door_frame.position.z -
                                                  0.0999) > self.tolerances:
                print(
                    "########## door frame position over the limit ##########",
                    update)
                return True
            elif self.min_static_taxel0 < self.min_static_limit and self.min_static_taxel1 < self.min_static_limit:
                print("########## static_taxles over the min_limit ##########",
                      update)
                return True
            elif self.max_static_taxel0 > self.max_static_limit and self.max_static_taxel1 > self.max_static_limit:
                print("########## static_taxles over the max_limit ##########",
                      update)
                return True
            else:
                return False
        else:
            return False
예제 #9
0
class CartPole3DEnv(gym.Env):

    def __init__(self):
        self.publishers_array = []
        self._base_pub = rospy.Publisher('/cart_pole_3d/cart_joint_velocity_controller/command', Float64, queue_size=1)
        # self._pole_pub = rospy.Publisher('/cartpole_v0/pole_joint_velocity_controller/command', Float64, queue_size=1)
        self.publishers_array.append(self._base_pub)
        # self.publishers_array.append(self._pole_pub)

        self.action_space = spaces.Discrete(2) #l,r,L,R,nothing
        self._seed()

        #get configuration parameters
        self.min_pole_angle = rospy.get_param('/cart_pole_3d/min_angle')
        self.max_pole_angle = rospy.get_param('/cart_pole_3d/max_angle')
        self.max_base_velocity = rospy.get_param('/cart_pole_3d/cart_speed_fixed_value')
        self.min_base_position = -rospy.get_param('/cart_pole_3d/max_distance')
        self.max_base_position = rospy.get_param('/cart_pole_3d/max_distance')
        self.pos_step = rospy.get_param('/cart_pole_3d/pos_step')
        self.running_step = rospy.get_param('/cart_pole_3d/running_step')
        self.init_pos = rospy.get_param('/cart_pole_3d/init_cart_vel')
        self.wait_time = rospy.get_param('/cart_pole_3d/wait_time')

        self.init_internal_vars(self.init_pos)

        rospy.Subscriber("/cart_pole_3d/joint_states", JointState, self.joints_callback)

        # stablishes connection with simulator
        self.controllers_list = ['cart_joint_velocity_controller']


        self.gazebo = GazeboConnection()
        self.controllers_object = ControllersConnection(namespace="cart_pole_3d",
                                    controllers_list=self.controllers_list)

    def init_internal_vars(self, init_pos_value):
        self.pos = [init_pos_value]
        self.joints = None

    #always returns the current state of the joints
    def joints_callback(self, data):
        self.joints = data

    def get_clock_time(self):
        self.clock_time = None
        while self.clock_time is None and not rospy.is_shutdown():
            try:
                self.clock_time = rospy.wait_for_message("/clock", Clock, timeout=1.0)
                rospy.logdebug("Current clock_time READY=>" + str(self.clock_time))
            except:
                rospy.logdebug("Current clock_time not ready yet, retrying for getting Current clock_time")
        return self.clock_time

    def _seed(self, seed=None): #overriden function
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):#overriden function

        # Take action
        if action == 0: #LEFT
            rospy.logwarn("GO LEFT...")
            self.pos[0] -= self.pos_step
        elif action == 1: #RIGHT
            rospy.logwarn("GO RIGHT...")
            self.pos[0] += self.pos_step

        rospy.logwarn("MOVING TO POS=="+str(self.pos))

        # 1st: unpause simulation
        rospy.logdebug("Unpause SIM...")
        self.gazebo.unpauseSim()

        self.move_joints(self.pos)
        rospy.logdebug("Wait for some time to execute movement, time="+str(self.running_step))
        rospy.sleep(self.running_step) #wait for some time
        rospy.logdebug("DONE Wait for some time to execute movement, time=" + str(self.running_step))

        # 3rd: pause simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # 4th: get the observation
        observation, done, state = self.observation_checks()

        # 5th: get the reward
        if not done:
            step_reward = 0
            obs_reward = self.get_reward_for_observations(state)
            rospy.loginfo("Reward Values: Time="+str(step_reward)+",Obs="+str(obs_reward))
            reward = step_reward + int(obs_reward)
            rospy.loginfo("TOT Reward=" + str(reward))
        else:
            reward = -2000000

        now = rospy.get_rostime()

        # TODO: Seems that gym version 0.8 doesnt support this very well.
        return observation, reward, done, {}

    def _reset(self):
        rospy.loginfo("We UNPause the simulation to start having topic data")
        self.gazebo.unpauseSim()

        rospy.loginfo("CLOCK BEFORE RESET")
        self.get_clock_time()

        rospy.loginfo("AFTER INITPOSE CHECKING SENSOR DATA")
        # self.check_all_systems_ready()
        self.check_joint_states_ready()
        rospy.loginfo("SETTING INITIAL POSE TO AVOID")
        self.set_init_pose()
        time.sleep(self.wait_time * 2.0)

        #rospy.logdebug("We deactivate gravity to check any reasidual effect of reseting the simulation")
        #self.gazebo.change_gravity(0.0, 0.0, 0.0)

        rospy.loginfo("RESETING SIMULATION")
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()
        rospy.loginfo("CLOCK AFTER RESET")
        self.get_clock_time()

        rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
        self.controllers_object.reset_controllers()
        rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
        # self.check_all_systems_ready()
        self.check_joint_states_ready()
        rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
        self.get_clock_time()
        #rospy.logdebug("We reactivating gravity...")
        #self.gazebo.change_gravity(0.0, 0.0, -9.81)
        rospy.loginfo("END")

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # get the last observation got when paused, generated by the callbakc or the check_all_systems_ready
        # Depends on who was last
        observation, _, state = self.observation_checks()

        return observation


    '''
    UTILITY CODE FOLLOWS HERE
    '''

    def observation_checks(self):
        done = False
        data = self.joints
        #       base_postion            base_velocity           pole angle              pole velocity
        state = [round(data.position[1],1), round(data.velocity[1],1), round(data.position[0],3), round(data.velocity[0],3)]
        #   pole angle  pole velocity
        rospy.loginfo("BASEPOSITION=="+str(state[0]))
        rospy.loginfo("POLE ANGLE==" + str(state[2]))
        if (self.min_base_position >= state[0] or state[0] >= self.max_base_position): #check if the base is still within the ranges of (-2, 2)
            rospy.logerr("Base Ouside Limits==>min="+str(self.min_base_position)+",pos="+str(state[0])+",max="+str(self.max_base_position))
            done = True
        if (self.min_pole_angle >= state[2] or state[2] >= self.max_pole_angle): #check if pole has toppled over
            rospy.logerr(
                "Pole Angle Ouside Limits==>min=" + str(self.min_pole_angle) + ",pos=" + str(state[2]) + ",max=" + str(
                    self.max_pole_angle))
            done = True

        observations = [state[2]]

        return observations, done, state

    def get_reward_for_observations(self, state):
        """
        Gives more points for staying upright, gets data from given observations to avoid
        having different data than other previous functions
        :return:reward
        """

        pole_angle = state[2]
        pole_vel = state[3]

        rospy.logwarn("pole_angle for reward==>" + str(pole_angle))
        delta = 0.7 - abs(pole_angle)
        reward_pole_angle = math.exp(delta*10)

        # If we are moving to the left and the pole is falling left is Bad
        rospy.logwarn("pole_vel==>" + str(pole_vel))
        pole_vel_sign = numpy.sign(pole_vel)
        pole_angle_sign = numpy.sign(pole_angle)
        rospy.logwarn("pole_vel sign==>" + str(pole_vel_sign))
        rospy.logwarn("pole_angle sign==>" + str(pole_angle_sign))

        # We want inverted signs for the speeds. We multiply by -1 to make minus positive.
        # global_sign + = GOOD, global_sign - = BAD
        base_reward = 500
        if pole_vel != 0:
            global_sign = pole_angle_sign * pole_vel_sign * -1
            reward_for_efective_movement = base_reward * global_sign
        else:
            # Is a particular case. If it doesnt move then its good also
            reward_for_efective_movement = base_reward

        reward = reward_pole_angle + reward_for_efective_movement

        rospy.logwarn("reward==>" + str(reward)+"= r_pole_angle="+str(reward_pole_angle)+",r_movement= "+str(reward_for_efective_movement))
        return reward


    def check_publishers_connection(self):
        """
        Checks that all the publishers are working
        :return:
        """
        rate = rospy.Rate(10)  # 10hz
        while (self._base_pub.get_num_connections() == 0 and not rospy.is_shutdown()):
            rospy.loginfo("No susbribers to _base_pub yet so we wait and try again")
            try:
                rate.sleep()
            except rospy.ROSInterruptException:
                # This is to avoid error when world is rested, time when backwards.
                pass
        rospy.loginfo("_base_pub Publisher Connected")

    def check_all_systems_ready(self, init=True):
        self.base_position = None
        while self.base_position is None and not rospy.is_shutdown():
            try:
                self.base_position = rospy.Subscriber("/cart_pole_3d/joint_states", JointState, timeout=1.0)
                rospy.logdebug("Current /cart_pole_3d/joint_states READY=>"+str(self.base_position))
                if init:
                    # We Check all the sensors are in their initial values
                    positions_ok = all(abs(i) <= 1.0e-02 for i in self.base_position.position)
                    velocity_ok = all(abs(i) <= 1.0e-02 for i in self.base_position.velocity)
                    efforts_ok = all(abs(i) <= 1.0e-01 for i in self.base_position.effort)
                    base_data_ok = positions_ok and velocity_ok and efforts_ok
                    rospy.logdebug("Checking Init Values Ok=>" + str(base_data_ok))
            except:
                rospy.logerr("Current /cart_pole_3d/joint_states not ready yet, retrying for getting joint_states")
        rospy.logdebug("ALL SYSTEMS READY")

    def check_joint_states_ready(self):
        self.base_position = None
        while self.base_position is None and not rospy.is_shutdown():
            try:
                self.base_position = rospy.wait_for_message("/cart_pole_3d/joint_states", JointState, timeout=1.0)
                # check response from this topic for 1 second. if don't received respone, it mean not ready.
                # Assure data channels are working perfectly.
                rospy.logdebug("Current /cart_pole_3d/joint_states READY=>" + str(self.base_position))

            except:
                rospy.logerr("Current /cart_pole_3d/joint_states not ready yet, retrying for getting joint_states")


    def move_joints(self, joints_array):
        joint_value = Float64()
        joint_value.data = joints_array[0]
        rospy.logdebug("Single Base JointsPos>>"+str(joint_value))
        self._base_pub.publish(joint_value)


    def set_init_pose(self):
        """
        Sets joints to initial position [0,0,0]
        :return:
        """
        self.check_publishers_connection()
        # Reset Internal pos variable
        self.init_internal_vars(self.init_pos)
        self.move_joints(self.pos)
class DiscreteQuadCopterEnv(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment

        self.pos_pub = rospy.Publisher('/drone/cmd_pos', Point, queue_size=1)
        self.vel_pub = rospy.Publisher('/cmd_vel', Twist, queue_size=5)
        self.takeoff_pub = rospy.Publisher('/drone/takeoff',
                                           EmptyTopicMsg,
                                           queue_size=0)
        self.switch_pub = rospy.Publisher('/drone/posctrl', Bool, queue_size=1)
        # gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")

        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.minx = rospy.get_param("/limits/min_x")
        self.maxx = rospy.get_param("/limits/max_x")
        self.miny = rospy.get_param("/limits/min_y")
        self.maxy = rospy.get_param("/limits/max_y")
        #self.max_altitude = rospy.get_param("/limits/max_altitude")
        #self.min_altitude = rospy.get_param("/limits/min_altitude")
        self.shape = (10, 10)

        self.incx = (self.maxx - self.minx) / (self.shape[0])
        self.incy = (self.maxy - self.miny) / (self.shape[1])
        #self.incz = (self.max_altitude - self.min_altitude)/ self.shape[0]

        self.horizontal_bins = np.zeros((2, self.shape[1]))
        #self.vertical_bin = np.zeros((self.shape[0]))

        self.horizontal_bins[0] = np.linspace(self.minx, self.maxx,
                                              self.shape[1])
        self.horizontal_bins[1] = np.linspace(self.miny, self.maxy,
                                              self.shape[1])
        #self.vertical_bin = np.linspace(self.min_altitude,self.max_altitude,self.shape[0])
        self.goal = np.zeros(2)
        #self.goal[0] = int(np.digitize(self.desired_pose.position.z,self.vertical_bin))
        self.goal[0] = int(
            np.digitize(self.desired_pose.position.x, self.horizontal_bins[0]))
        self.goal[1] = int(
            np.digitize(self.desired_pose.position.y, self.horizontal_bins[1]))
        rospy.loginfo("Goal: %s" % (self.goal))
        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.action_space = spaces.Discrete(4)  #Forward,Backward,Left,Right
        self.reward_range = (-np.inf, np.inf)

        self._seed()
        self.init_desired_pose()

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        # 1st: resets the simulation to initial values
        self.gazebo.resetSim()

        # 2nd: Unpauses simulation
        self.gazebo.unpauseSim()

        # 3rd: resets the robot to initial conditions
        self.check_topic_publishers_connection()

        self.takeoff_sequence()
        self.switch_position_control()
        # 4th: takes an observation of the initial condition of the robot
        data_pose, data_imu = self.take_observation()
        observation = [
            data_pose.position.x, data_pose.position.y, data_pose.position.z
        ]

        return observation

    def _step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        pose, _ = self.take_observation()
        rospy.loginfo("Observation has taken")
        pos_cmd = pose.position
        pos_cmd.z = 1.0
        if action == FORWARD:
            pos_cmd.x += self.incx
        elif action == BACKWARD:
            pos_cmd.x -= self.incx
        elif action == RIGHT:
            pos_cmd.y += self.incy
        elif action == LEFT:
            pos_cmd.y -= self.incy
        #elif action == UP:
        #    pos_cmd.z += self.incz
        #elif action == DOWN:
        #    pos_cmd.z -= self.incz
        # Then we send the command to the robot and let it go
        # for running_step seconds
        #self.gazebo.unpauseSim()
        self.pos_pub.publish(pos_cmd)
        time.sleep(self.running_step)
        data_pose, data_imu = self.take_observation()
        #self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.process_data(data_pose, data_imu)
        state = [data_pose.position.x, data_pose.position.y]
        return state, reward, done, {}

    def _render(self, mode, close=True):
        pass

    def take_observation(self):
        data_pose = None
        while data_pose is None:
            try:
                data_pose = rospy.wait_for_message('/drone/gt_pose',
                                                   Pose,
                                                   timeout=5)
            except:
                pass

        data_imu = None
        while data_imu is None:
            try:
                data_imu = rospy.wait_for_message('/drone/imu', Imu, timeout=5)
            except:
                pass

        return data_pose, data_imu

    def calculate_dist_between_two_points(self, p_init, p_end):
        a = np.array((p_init.x, p_init.y, p_init.z))
        b = np.array((p_end.x, p_end.y, p_end.z))

        dist = np.linalg.norm(a - b)

        return dist

    def init_desired_pose(self):

        current_init_pose, imu = self.take_observation()

        self.best_dist = self.calculate_dist_between_two_points(
            current_init_pose.position, self.desired_pose.position)

    def check_topic_publishers_connection(self):

        rate = rospy.Rate(10)  # 10hz
        while (self.takeoff_pub.get_num_connections() == 0):
            rospy.loginfo(
                "No subscribers to Takeoff yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Takeoff Publisher Connected")

        while (self.vel_pub.get_num_connections() == 0):
            rospy.loginfo(
                "No subscribers to Cmd_vel yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Cmd_vel Publisher Connected")

    def reset_cmd_vel_commands(self):
        # We send an empty null Twist
        vel_cmd = Twist()
        vel_cmd.linear.x = 0.0
        vel_cmd.linear.y = 0.0
        vel_cmd.linear.z = 0.0
        vel_cmd.angular.z = 0.0
        self.vel_pub.publish(vel_cmd)

    def switch_position_control(self):
        msg = Bool()
        msg.data = True
        self.switch_pub.publish(msg)
        rospy.loginfo("Switched to position control")

    def takeoff_sequence(self, seconds_taking_off=2):
        # Before taking off be sure that cmd_vel value there is is null to avoid drifts
        self.reset_cmd_vel_commands()

        takeoff_msg = EmptyTopicMsg()
        rospy.loginfo("Taking-Off Start")
        pos_cmd = Point()
        pos_cmd.z = 1.0
        self.pos_pub.publish(pos_cmd)
        self.takeoff_pub.publish(takeoff_msg)
        time.sleep(seconds_taking_off)
        rospy.loginfo("Taking-Off sequence completed")

    def process_data(self, data_position, data_imu):
        done = False
        euler = tf.transformations.euler_from_quaternion([
            data_imu.orientation.x, data_imu.orientation.y,
            data_imu.orientation.z, data_imu.orientation.w
        ])
        roll = euler[0]
        pitch = euler[1]
        yaw = euler[2]

        state = np.zeros(2)
        #state[0] = int(np.digitize(data_position.position.z,self.vertical_bin))# z first
        state[0] = int(
            np.digitize(data_position.position.x, self.horizontal_bins[0]))
        state[1] = int(
            np.digitize(data_position.position.y, self.horizontal_bins[1]))
        #invalid_altitude = state[0] == 0 or state[0] == self.shape[0]-1
        if tuple(state) == tuple(self.goal):
            done = True
            reward = 0
        #elif invalid_altitude:
        #    reward = -50000 # Punish hardly
        #    done = True
        else:
            reward = -1
        return reward, done
예제 #11
0
class QuadCopterEnv(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment

        self.vel_pub = rospy.Publisher('/drone_1/cmd_vel', Twist, queue_size=5)
        # self.takeoff_pub = rospy.Publisher('/drone/takeoff', EmptyTopicMsg, queue_size=0)

        # gets training parameters from param server
        self.speed_value = rospy.get_param("/speed_value")
        self.desired_pose = Pose()
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_altitude = rospy.get_param("/max_altitude")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.action_space = spaces.Discrete(5)  #Forward,Left,Right,Up,Down
        self.reward_range = (-np.inf, np.inf)

        self._seed()

        self.goal_pose = [1, 1, 1]
        self.goal_threshold = 0.05
        self.goal_reward = 50
        self.prev_state = []

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def reset(self):

        # 1st: resets the simulation to initial values
        self.gazebo.resetSim()

        # 2nd: Unpauses simulation
        self.gazebo.unpauseSim()

        # 3rd: resets the robot to initial conditions
        self.check_topic_publishers_connection()
        self.init_desired_pose()
        self.takeoff_sequence()

        # 4th: takes an observation of the initial condition of the robot
        data_pose, imu_pose = self.take_observation()
        observation = [
            data_pose.position.x, data_pose.position.y, data_pose.position.z
        ]
        self.prev_state = observation

        # 5th: pauses simulation
        self.gazebo.pauseSim()

        return observation

    def step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, we decide which velocity command corresponds
        vel_cmd = Twist()
        vel_cmd.linear.x = action[0]
        vel_cmd.linear.y = action[1]
        vel_cmd.linear.z = action[2]
        """if action == 0: #FORWARD
            vel_cmd.linear.x = self.speed_value
            vel_cmd.angular.z = 0.0
        elif action == 1: #LEFT
            vel_cmd.linear.x = 0.05
            vel_cmd.angular.z = self.speed_value
        elif action == 2: #RIGHT
            vel_cmd.linear.x = 0.05
            vel_cmd.angular.z = -self.speed_value
        elif action == 3: #Up
            vel_cmd.linear.z = self.speed_value
            vel_cmd.angular.z = 0.0
        elif action == 4: #Down
            vel_cmd.linear.z = -self.speed_value
            vel_cmd.angular.z = 0.0"""

        # Then we send the command to the robot and let it go
        # for running_step seconds
        self.gazebo.unpauseSim()
        self.vel_pub.publish(vel_cmd)
        time.sleep(self.running_step)
        data_pose, data_imu = self.take_observation()
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.process_data(data_pose)

        # Promote going forwards instead if turning

        state = [
            data_pose.position.x, data_pose.position.y, data_pose.position.z
        ]
        self.prev_state = state
        return state, reward, done, {}

    def take_observation(self):
        data_pose = None
        while data_pose is None:
            try:
                data_pose_raw = rospy.wait_for_message(
                    '/drone_1/ground_truth_to_tf/pose',
                    PoseStamped,
                    timeout=10)
                data_pose = data_pose_raw.pose  # Equals to pose_ in rohit-s-murthy's code
            except:
                rospy.loginfo(
                    "Current drone pose not ready yet, retrying for getting robot pose"
                )

        data_imu = None
        while data_imu is None:
            try:
                data_imu = rospy.wait_for_message('/drone_1/raw_imu',
                                                  Imu,
                                                  timeout=10)
            except:
                rospy.loginfo(
                    "Current drone imu not ready yet, retrying for getting robot imu"
                )

        return data_pose, data_imu

    def init_desired_pose(self):

        current_init_pose, current_init_imu = self.take_observation()
        """self.best_dist = self.calculate_dist_between_two_Points(current_init_pose.position, self.desired_pose.position)"""

    def check_topic_publishers_connection(self):

        rate = rospy.Rate(10)  # 10hz

        while (self.vel_pub.get_num_connections() == 0):
            rospy.loginfo(
                "No susbribers to Cmd_vel yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Cmd_vel Publisher Connected")

    def reset_cmd_vel_commands(self):
        # We send an empty null Twist
        vel_cmd = Twist()
        vel_cmd.linear.z = 0.0
        vel_cmd.angular.z = 0.0
        self.vel_pub.publish(vel_cmd)

    def takeoff_sequence(self, seconds_taking_off=1):
        # Before taking off be sure that cmd_vel value there is is null to avoid drifts
        self.reset_cmd_vel_commands()

        takeoff_msg = EmptyTopicMsg()
        rospy.loginfo("Taking-Off Start")
        self.takeoff_pub.publish(takeoff_msg)
        time.sleep(seconds_taking_off)
        rospy.loginfo("Taking-Off sequence completed")

    def process_data(self, data_pose):

        done = False
        """euler = tf.transformations.euler_from_quaternion([data_imu.orientation.x,data_imu.orientation.y,data_imu.orientation.z,data_imu.orientation.w])
        roll = euler[0]
        pitch = euler[1]
        yaw = euler[2]

        pitch_bad = not(-self.max_incl < pitch < self.max_incl)
        roll_bad = not(-self.max_incl < roll < self.max_incl)
        altitude_bad = data_position.position.z > self.max_altitude

        if altitude_bad or pitch_bad or roll_bad:
            rospy.loginfo ("(Drone flight status is wrong) >>> ("+str(altitude_bad)+","+str(pitch_bad)+","+str(roll_bad)+")")
            done = True
            reward = -200
        else:
            reward, reached_goal = self.get_reward(data_pose)
            if reached_goal:
                print('Reached Goal!')
                done = True"""

        reward, reached_goal = self.get_reward(data_pose)
        if reached_goal:
            print('Reached Goal!')
            done = True

        return reward, done

    # Now the reward just related to the current_pose and goal
    def get_reward(self, data_pose):
        reward = 0
        reached_goal = False

        error = self._distance(data_pose)
        current_pose = [
            data_pose.position.x, data_pose.position.y, data_pose.position.z
        ]

        if error < self.goal_threshold:
            reward += self.goal_reward
            reached_goal = True
        else:
            reward += np.linalg.norm(
                np.subtract(self.prev_state, self.goal_pose)) - np.linalg.norm(
                    np.subtract(current_pose, self.goal_pose))

        return reward, reached_goal

    # Calculate the distance
    def _distance(self, data_pose):
        current_pose = [
            data_pose.position.x, data_pose.position.y, data_pose.position.z
        ]

        err = np.subtract(current_pose, self.goal_pose)
        w = np.array([1, 1, 4])
        err = np.multiply(w, err)
        dist = np.linalg.norm(err)
        return dist
예제 #12
0
class QuadCopterEnv(gym.Env):
    def __init__(self):

        self.running_step = 0.3
        self.mode = ''
        self.count = 0
        self.current_init_pose = PoseStamped()

        self.desired_pose = PoseStamped()
        self.desired_pose.pose.position.x = 6
        self.desired_pose.pose.position.y = 10
        self.desired_pose.pose.position.z = 5

        self.gazebo = GazeboConnection()
        self.arming = armtakeoff()
        self.data_pose = PoseStamped()

        self.vel_pub = rospy.Publisher('/mavros/setpoint_velocity/cmd_vel',
                                       TwistStamped,
                                       queue_size=20)
        self.action_space = spaces.Discrete(5)
        self.reward_range = (-np.inf, np.inf)
        self.seed()

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        #self.control_snake.respawn()
        self.gazebo.resetSim()

        self.gazebo.unpauseSim()

        self.arming.disarm()

        self.gazebo.resetSim()

        self.init_desired_pose()

        self.arming.arm()
        self.take_observation()
        observation = np.array([
            self.data_pose.pose.position.x, self.data_pose.pose.position.y,
            self.data_pose.pose.position.z,
            self.calculate_dist_between_two_Points()
        ])
        observation = np.float32(observation)
        self.gazebo.unpauseSim()
        return observation

    def _step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, we decide which velocity command corresponds
        vel_cmd = TwistStamped()
        if action == 0:  #FORWARD
            vel_cmd.twist.linear.x = 3
        elif action == 1:  #Back
            vel_cmd.twist.linear.x = -3
        elif action == 2:  #LEFT
            vel_cmd.twist.linear.y = 3
        elif action == 3:  #RIGHT
            vel_cmd.twist.linear.y = -3
        elif action == 4:  #Up
            vel_cmd.twist.linear.z = 3
        elif action == 5:  #Down
            vel_cmd.twist.linear.z = -3

        self.gazebo.unpauseSim()
        self.vel_pub.publish(vel_cmd)
        self.take_observation()

        reward, done = self.process_data(self)
        reward = (reward - 100) * 2
        state = np.array([
            self.data_pose.pose.position.x, self.data_pose.pose.position.y,
            self.data_pose.pose.position.z,
            self.calculate_dist_between_two_Points()
        ])
        state = np.float32(state)
        return state, reward, done, {}

    def take_observation(self):
        def current_pos_callback(position):
            self.data_pose = position

        rospy.Subscriber('mavros/local_position/pose', PoseStamped,
                         current_pos_callback)

    def calculate_dist_between_two_Points(self):
        s1x = self.data_pose.pose.position.x
        s1y = self.data_pose.pose.position.y
        s1z = self.data_pose.pose.position.z
        p2x = self.desired_pose.pose.position.x
        p2y = self.desired_pose.pose.position.y
        p2z = self.desired_pose.pose.position.z
        a = np.array((s1x, s1y, s1z))
        b = np.array((p2x, p2y, p2z))

        dist = np.linalg.norm(a - b)
        return dist

    def init_desired_pose(self):

        self.current_init_pose.pose.position.x = self.data_pose.pose.position.x
        self.current_init_pose.pose.position.y = self.data_pose.pose.position.y
        self.current_init_pose.pose.position.z = self.data_pose.pose.position.z

    def improved_distance_reward(self, current_pose):

        current_dist = self.calculate_dist_between_two_Points()

        reward = -1 * current_dist + 20
        return reward

    def process_data(self, data_position):

        done = False

        if self.desired_pose == self.data_pose:
            reward = 10000
        else:
            reward = self.improved_distance_reward(data_position)

        return reward, done
예제 #13
0
class QuadCopterEnv(gym.Env):

    def __init__(self):
        
        # We assume that a ROS node has already been created
        # before initialising the environment
        
        self.vel_pub = rospy.Publisher('/cmd_vel', Twist, queue_size=50)
        self.takeoff_pub = rospy.Publisher('/ardrone/takeoff', EmptyTopicMsg, queue_size=0)
        
        # gets training parameters from param server
        self.speed_value = rospy.get_param("/speed_value")
        self.rotation_speed_value = rospy.get_param("/rotation_value")
        self.desired_pose = Pose()
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_altitude = rospy.get_param("/max_altitude")
        self.max_distance = rospy.get_param("/max_distance")
        
        # stablishes connection with simulator
        self.gazebo = GazeboConnection()
        high = np.array([
         1])
        self.action_space = spaces.Discrete(3) #noop, up down, rot cw, rot ccw
        self.observation_space = spaces.Box(np.array([-1]), np.array([1]))
        self.reward_range = (-np.inf, np.inf)

        self.seed()

    # A function to initialize the random generator
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def render(self, mode=None):
        None
    # Resets the state of the environment and returns an initial observation.
    def reset(self):
        
        # 1st: resets the simulation to initial values
        self.gazebo.resetSim()
        vel_cmd = Twist()
        vel_cmd.linear.x = 0.0
        vel_cmd.linear.y = 0.0
        vel_cmd.linear.z = 0.0
        vel_cmd.angular.z = 0.0


        # 2nd: Unpauses simulation
        self.gazebo.unpauseSim()
        self.vel_pub.publish(vel_cmd)

        # 3rd: resets the robot to initial conditions
        self.check_topic_publishers_connection()
        self.init_desired_pose()
        self.takeoff_sequence()
        rotation = [0,0,0]
        # 4th: takes an observation of the initial condition of the robot
        data_pose, self.data_imu = self.take_observation()
        # observation = [self.roundTo(data_pose.position.x,100)-self.roundTo(self.desired_pose.position.x,100), 
                #  self.roundTo(data_pose.position.y,100)-self.roundTo(self.desired_pose.position.y,100), 
                #  self.roundTo(data_pose.position.z,100)-self.roundTo(self.desired_pose.position.z,100), 

                 #self.roundTo(self.desired_pose.position.x,100), 
                 #self.roundTo(self.desired_pose.position.y,100), 
                 #self.roundTo(self.desired_pose.position.z,100),

                #  self.roundTo(rotation[0], 100),
                #  self.roundTo(rotation[1], 100),
                #  self.roundTo(rotation[2], 100)
                #  ]
        #observation = [data_pose.position.z - self.desired_pose.position.z, data_pose.position.z, self.data_imu.orientation.z]
        observation = [self.data_imu.orientation.z]
        # 5th: pauses simulation
        self.gazebo.pauseSim()

        with open("/root/catkin_ws/src/position.csv", "a") as myfile:
            myfile.write("reset\n")
        with open("/root/catkin_ws/src/distance.csv", "a") as myfile:
            myfile.write("reset\n")
        self.simulationStartTime = rospy.get_time()
        self.realStarttime = time.time();
        self.simulationStep = 0
        return observation

    def step(self, action):
        self.simulationStep += 1
        print("Action->" + str(action))

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot
        
        # 1st, we decide which velocity command corresponds
        vel_cmd = Twist()
        vel_cmd.linear.x = 0.0
        vel_cmd.linear.y = 0.0
        vel_cmd.linear.z = 0.0
        vel_cmd.angular.z = 0.0

        if action == 0: # noop
            pass
        elif action == 1: # ccw
            if self.data_imu.orientation.z < 0.9:
                vel_cmd.angular.z = self.rotation_speed_value
            else:
                rospy.loginfo("Could not rotate ccw because orientation: {}".format([self.data_imu.orientation.z]))
        elif action == 2: # cw
            if self.data_imu.orientation.z > -0.9:
                vel_cmd.angular.z = -self.rotation_speed_value
            else:
                rospy.loginfo("Could not rotate cw because orientation: {}".format([self.data_imu.orientation.z]))

        # Then we send the command to the robot and let it go
        # for running_step seconds
        if (rospy.get_time() - self.simulationStartTime) > 0:
            rate = (time.time() - self.realStarttime) / (rospy.get_time() - self.simulationStartTime)
        else:
            rate = 0.1
        self.gazebo.unpauseSim()
        
        self.vel_pub.publish(vel_cmd)
        time.sleep(self.running_step * rate)
        data_pose, self.data_imu = self.take_observation()
        self.reset_cmd_vel_commands()
        
        self.gazebo.pauseSim()

        if data_pose.position.z < 1:
            self.up()

        # finally we get an evaluation based on what happened in the sim
        reward,done, rotation_distance = self.process_data(data_pose, self.data_imu)

        state = [rotation_distance]
        #print "after step state"
        print (state)
        print (reward)
        with open("/root/catkin_ws/src/position.csv", "a") as myfile:
            myfile.write("rot:{}\n".format(state[0] ))
        return state, reward, done, {}

    def up(self):
        if (rospy.get_time() - self.simulationStartTime) > 0:
            rate = (time.time() - self.realStarttime) / (rospy.get_time() - self.simulationStartTime)
        else:
            rate = 0.1

        vel_cmd = Twist()
        vel_cmd.linear.x = 0.0
        vel_cmd.linear.y = 0.0
        vel_cmd.linear.z = 0.5
        vel_cmd.angular.z = 0.0

        self.gazebo.unpauseSim()
        
        self.vel_pub.publish(vel_cmd)
        time.sleep(self.running_step * rate)
        self.reset_cmd_vel_commands()
        
        self.gazebo.pauseSim()

    def roundTo(self,data,to):
        return int(data * to)

    def take_observation (self):
        data_pose = None
        while data_pose is None:
            try:
#                data_pose = rospy.wait_for_message('/drone/gt_pose', Pose, timeout=5)
                data_pose = rospy.wait_for_message('/ground_truth/state', Odometry, timeout=5)
                data_pose = data_pose.pose.pose
            except Exception as e:
                None
                #rospy.loginfo("Current drone pose not ready yet, retrying for getting robot pose {0}".format(e))

        data_imu = None
        while data_imu is None:
            try:
                data_imu = rospy.wait_for_message('/ardrone/imu', Imu, timeout=5)
            except:
                None
                #rospy.loginfo("Current drone imu not ready yet, retrying for getting robot imu")
        
        return data_pose, data_imu

    def calculate_dist_between_two_Points(self,p_init,p_end):
        a = np.array((p_init.x ,p_init.y, p_init.z))
        b = np.array((p_end.x ,p_end.y, p_end.z))
        
        dist = np.linalg.norm(a-b)
        
        return dist


    def init_desired_pose(self):
        current_init_pose, self.data_imu = self.take_observation()
        self.best_dist = self.calculate_dist_between_two_Points(current_init_pose.position, self.desired_pose.position)

    def check_topic_publishers_connection(self):
        
        rate = rospy.Rate(1) # 10hz
        while(self.takeoff_pub.get_num_connections() == 0):
            rospy.loginfo("No susbribers to Takeoff yet so we wait and try again")
            rate.sleep();
        rospy.loginfo("Takeoff Publisher Connected")

        while(self.vel_pub.get_num_connections() == 0):
            rospy.loginfo("No susbribers to Cmd_vel yet so we wait and try again")
            rate.sleep();
        rospy.loginfo("Cmd_vel Publisher Connected")
        

    def reset_cmd_vel_commands(self):
        # We send an empty null Twist
        vel_cmd = Twist()
        vel_cmd.linear.z = 0.0
        vel_cmd.angular.z = 0.0
        self.vel_pub.publish(vel_cmd)


    def takeoff_sequence(self, seconds_taking_off=0.1):
        rospy.loginfo("Takeoff sequence starting")
        # Before taking off be sure that cmd_vel value there is is null to avoid drifts
        self.reset_cmd_vel_commands()
        rospy.loginfo("Takeoff reset cmd_vel")
        
        takeoff_msg = EmptyTopicMsg()
        rospy.loginfo( "Taking-Off Start")
        self.takeoff_pub.publish(takeoff_msg)
        time.sleep(seconds_taking_off)
        rospy.loginfo( "Taking-Off sequence completed")
        

    def improved_distance_reward(self, current_pose, current_orientation):
        # current_dist = self.calculate_dist_between_two_Points(current_pose.position, self.desired_pose.position)
        current_dist = current_pose.position.z - self.desired_pose.position.z
        x = self.desired_pose.position.x - current_pose.position.x
        y = self.desired_pose.position.y - current_pose.position.y
        desired_angle = math.atan2(x, y)
        current_rotation_distance = current_orientation.z 
        rospy.loginfo( "rotation distance: " + str(current_rotation_distance))
        #rospy.loginfo("Calculated Distance = "+str(current_dist))
        
        self.speed_value = abs(current_dist) / 5
        self.rotation_speed_value = abs(current_rotation_distance)

        # Reward:
        reward = (100 - 100 * abs(current_rotation_distance)) ** 2
        
        return reward, current_dist, current_rotation_distance
        
    def process_data(self, data_position, data_imu):

        done = False
        
        euler = tf.transformations.euler_from_quaternion([data_imu.orientation.x,data_imu.orientation.y,data_imu.orientation.z,data_imu.orientation.z])
        roll = euler[0]
        pitch = euler[1]
        yaw = euler[2]
        #print "Yaw->" + str(yaw)

        pitch_bad = not(-self.max_incl < pitch < self.max_incl)
        roll_bad = not(-self.max_incl < roll < self.max_incl)
        altitude_bad = data_position.position.z > self.max_altitude  or data_position.position.z < 0.05

        if altitude_bad or pitch_bad:
            rospy.loginfo ("(Drone flight status is wrong) >>> ("+str(altitude_bad)+","+str(pitch_bad)+","+str(roll_bad)+")")
            rospy.loginfo ("(Drone flight status is wrong) >>> ("+str(data_position.position.z)+","+str(pitch)+","+str(roll)+")")
            done = True
            reward = -2000000
            rotation_distance = 0
        else:
            reward, distance, rotation_distance = self.improved_distance_reward(data_position, data_imu.orientation)
            with open("/root/catkin_ws/src/distance.csv", "a") as myfile:
                myfile.write("{}, {}\n".format(distance, rotation_distance))

            if (distance > self.max_distance):
                done = True

        return reward, done, rotation_distance
class SafeSAREnv(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment

        self.pos_pub = rospy.Publisher('/drone/cmd_pos', Point, queue_size=1)
        self.takeoff_pub = rospy.Publisher('/drone/takeoff',
                                           EmptyTopicMsg,
                                           queue_size=0)
        self.switch_pub = rospy.Publisher('/drone/posctrl', Bool, queue_size=1)
        self.del_model_client = rospy.ServiceProxy('gazebo/delete_model',
                                                   DeleteModel)
        self.spawn_model_client = rospy.ServiceProxy('/gazebo/spawn_sdf_model',
                                                     SpawnModel)

        self.running_step = rospy.get_param("/running_step")
        self.minx = rospy.get_param("/limits/min_x")
        self.maxx = rospy.get_param("/limits/max_x")
        self.miny = rospy.get_param("/limits/min_y")
        self.maxy = rospy.get_param("/limits/max_y")
        self.base = Point()
        self.base.x = rospy.get_param("/base/x")
        self.base.y = rospy.get_param("/base/y")
        self.base.z = rospy.get_param("/base/z")
        self.gazebo = GazeboConnection()
        # Get number of survivors
        self.survivors = []
        self.get_survivor_information()
        self.rescued = np.full((len(self.survivors)), False)
        # All of the survivors + return to base.
        self.action_space = spaces.Discrete(len(self.survivors) + 1)
        self.num_actions = len(self.survivors) + 1
        self.reward_range = (-np.inf, np.inf)

        self.battery = 100
        self.battery_depletion = rospy.get_param("/battery_depletion")
        self.battery_timer = rospy.Timer(rospy.Duration(2),
                                         self.battery_timer_callback)
        area_limits_high = np.array([
            self.maxx,  #x
            self.maxy,  #y
            100  #battery
        ])
        area_limits_low = np.array([self.minx, self.miny, 0])
        self.observation_space = spaces.Box(area_limits_low, area_limits_high)
        self._seed()
        file_location = roslib.packages.get_pkg_dir(
            'sjtu_drone') + '/models/person_standing/model.sdf'
        file_xml = open(file_location)
        self.person_standing = file_xml.read()
        self.first_reset = True

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def reset(self):

        # 1st: resets the simulation to initial values
        self.gazebo.resetSim()

        # 2nd: Unpauses simulation
        self.gazebo.unpauseSim()

        # 3rd: resets the robot to initial conditions
        self.check_topic_publishers_connection()
        self.reset_battery()

        #if self.first_reset == False:
        #    req = SpawnModelRequest()
        #    for i in range(len(self.survivors)):
        #        if self.rescued[i]:
        #            req.model_name = self.survivors[i][0] # model name from command line input
        #            req.model_xml = self.person_standing
        #            req.initial_pose = self.survivors[i][1]
        #            res = self.spawn_model_client(req)
        self.rescued = np.full((len(self.survivors)), False)
        self.takeoff_sequence()
        self.switch_position_control()
        # 4th: takes an observation of the initial condition of the robot
        data_pose, data_imu = self.take_observation()
        observation = [
            data_pose.position.x, data_pose.position.y, self.battery
        ]
        self.first_reset = False
        return observation

    def step(self, action):

        # Action selection means the rescuing of the survivor.
        pos_cmd = Point()
        if action < len(self.survivors):
            rospy.loginfo("Chosen to rescue %dth survivor" % action)
            pos_cmd.z = 5.0
            pos_cmd.x = self.survivors[action][1].position.x
            pos_cmd.y = self.survivors[action][1].position.y
        else:
            rospy.loginfo("Chosen to return to base..")
            pos_cmd = self.base
        data_pose, _ = self.take_observation()
        dist = self.calculate_dist_between_two_points(data_pose.position,
                                                      pos_cmd)
        while dist > 0.2 and self.battery > 5.0:
            self.pos_pub.publish(pos_cmd)
            rospy.sleep(self.running_step)
            data_pose, _ = self.take_observation()
            dist = self.calculate_dist_between_two_points(
                data_pose.position, pos_cmd)
        rospy.sleep(self.running_step)
        if action < len(
                self.rescued):  # Strictly small means it is indeed a survivor.
            self.rescued[action] = True
            #rospy.loginfo("Removing survivor %s"%self.survivors[action][0])
            #self.del_model_client(self.survivors[action][0])

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.process_data(data_pose)
        state = [data_pose.position.x, data_pose.position.y, self.battery]
        return state, reward, done, {}

    def _render(self, mode, close=True):
        pass

    def get_survivor_information(self):
        all_models = None
        while all_models is None:
            try:
                all_models = rospy.wait_for_message('/gazebo/model_states',
                                                    ModelStates)
            except:
                pass
        for i in range(len(all_models.name)):
            if all_models.name[i].startswith('survivor'):
                self.survivors.append((all_models.name[i], all_models.pose[i]))

    def take_observation(self):
        data_pose = None
        while data_pose is None:
            try:
                data_pose = rospy.wait_for_message('/drone/gt_pose',
                                                   Pose,
                                                   timeout=5)
            except:
                pass

        data_imu = None
        while data_imu is None:
            try:
                data_imu = rospy.wait_for_message('/drone/imu', Imu, timeout=5)
            except:
                pass

        return data_pose, data_imu

    def calculate_dist_between_two_points(self, p_init, p_end):
        a = np.array((p_init.x, p_init.y, p_init.z))
        b = np.array((p_end.x, p_end.y, p_end.z))

        dist = np.linalg.norm(a - b)

        return dist

    def check_topic_publishers_connection(self):

        rate = rospy.Rate(10)  # 10hz
        while (self.takeoff_pub.get_num_connections() == 0):
            rospy.loginfo(
                "No subscribers to Takeoff yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Takeoff Publisher Connected")

    def switch_position_control(self):
        msg = Bool()
        msg.data = True
        self.switch_pub.publish(msg)
        rospy.loginfo("Switched to position control")

    def takeoff_sequence(self, seconds_taking_off=2):

        takeoff_msg = EmptyTopicMsg()
        rospy.loginfo("Taking-Off Start")
        pos_cmd = Point()
        pos_cmd.z = 5.0
        self.pos_pub.publish(pos_cmd)
        self.takeoff_pub.publish(takeoff_msg)
        rospy.sleep(seconds_taking_off)
        rospy.loginfo("Taking-Off sequence completed")

    def reset_battery(self):
        self.battery_timer.shutdown()
        self.battery_timer = rospy.Timer(rospy.Duration(2),
                                         self.battery_timer_callback)
        self.battery = 100

    def battery_timer_callback(self, event):
        self.battery -= self.battery_depletion
        rospy.loginfo("Battery: %lf", self.battery)

    def process_data(self, pose):
        done = False

        if np.all(self.rescued) and self.calculate_dist_between_two_points(
                pose.position, self.base) < 0.5:
            done = True
            reward = self.battery
        elif self.battery < 5:  # Not all of the survivors have been rescued and battery depleted too much. Punish harshly.
            reward = -1000
            done = True
        else:
            reward = -20
        return reward, done
예제 #15
0
class URSimReaching(robot_gazebo_env_goal.RobotGazeboEnv):
    def __init__(self):
        rospy.logdebug("Starting URSimReaching Class object...")

        # Init GAZEBO Objects
        self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state',
                                                SetModelState)
        self.get_world_state = rospy.ServiceProxy(
            '/gazebo/get_world_properties', GetWorldProperties)

        # Subscribe joint state and target pose
        rospy.Subscriber("/joint_states", JointState,
                         self.joints_state_callback)
        rospy.Subscriber("/target_blocks_pose", Point, self.target_point_cb)

        # Gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.running_step = rospy.get_param("/running_step")
        self.max_height = rospy.get_param("/max_height")
        self.min_height = rospy.get_param("/min_height")
        self.observations = rospy.get_param("/observations")

        # Joint limitation
        shp_max = rospy.get_param("/joint_limits_array/shp_max")
        shp_min = rospy.get_param("/joint_limits_array/shp_min")
        shl_max = rospy.get_param("/joint_limits_array/shl_max")
        shl_min = rospy.get_param("/joint_limits_array/shl_min")
        elb_max = rospy.get_param("/joint_limits_array/elb_max")
        elb_min = rospy.get_param("/joint_limits_array/elb_min")
        wr1_max = rospy.get_param("/joint_limits_array/wr1_max")
        wr1_min = rospy.get_param("/joint_limits_array/wr1_min")
        wr2_max = rospy.get_param("/joint_limits_array/wr2_max")
        wr2_min = rospy.get_param("/joint_limits_array/wr2_min")
        wr3_max = rospy.get_param("/joint_limits_array/wr3_max")
        wr3_min = rospy.get_param("/joint_limits_array/wr3_min")
        self.joint_limits = {
            "shp_max": shp_max,
            "shp_min": shp_min,
            "shl_max": shl_max,
            "shl_min": shl_min,
            "elb_max": elb_max,
            "elb_min": elb_min,
            "wr1_max": wr1_max,
            "wr1_min": wr1_min,
            "wr2_max": wr2_max,
            "wr2_min": wr2_min,
            "wr3_max": wr3_max,
            "wr3_min": wr3_min
        }

        shp_init_value = rospy.get_param("/init_joint_pose/shp")
        shl_init_value = rospy.get_param("/init_joint_pose/shl")
        elb_init_value = rospy.get_param("/init_joint_pose/elb")
        wr1_init_value = rospy.get_param("/init_joint_pose/wr1")
        wr2_init_value = rospy.get_param("/init_joint_pose/wr2")
        wr3_init_value = rospy.get_param("/init_joint_pose/wr3")
        self.init_joint_pose = [
            shp_init_value, shl_init_value, elb_init_value, wr1_init_value,
            wr2_init_value, wr3_init_value
        ]

        # Fill in the Done Episode Criteria list
        self.episode_done_criteria = rospy.get_param("/episode_done_criteria")

        # stablishes connection with simulator
        self._gz_conn = GazeboConnection()
        self._ctrl_conn = ControllersConnection(namespace="")

        # Controller type for ros_control
        self._ctrl_type = sys.argv[1]
        self.pre_ctrl_type = self._ctrl_type

        # We init the observations
        self.base_orientation = Quaternion()
        self.target_point = Point()
        self.joints_state = JointState()

        # Arm/Control parameters
        self._ik_params = setups['UR5_6dof']['ik_params']

        # ROS msg type
        self._joint_pubisher = JointPub()
        self._joint_traj_pubisher = JointTrajPub()

        # Gym interface and action
        self.action_space = spaces.Discrete(6)
        self.reward_range = (-np.inf, np.inf)
        self._seed()

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def target_point_cb(self, msg):
        self.target_point = msg

    def check_all_systems_ready(self):
        """
        We check that all systems are ready
        :return:
        """
        joint_states_msg = None
        while joint_states_msg is None and not rospy.is_shutdown():
            try:
                joint_states_msg = rospy.wait_for_message("/joint_states",
                                                          JointState,
                                                          timeout=0.1)
                self.joints_state = joint_states_msg
                rospy.logdebug("Current joint_states READY")
            except Exception as e:
                rospy.logdebug(
                    "Current joint_states not ready yet, retrying==>" + str(e))

        target_pose_msg = None
        while target_pose_msg is None and not rospy.is_shutdown():
            try:
                target_pose_msg = rospy.wait_for_message("/target_blocks_pose",
                                                         Point,
                                                         timeout=0.1)
                self.target_point = target_pose_msg
                rospy.logdebug("Reading target pose READY")
            except Exception as e:
                rospy.logdebug(
                    "Reading target pose not ready yet, retrying==>" + str(e))

        rospy.logdebug("ALL SYSTEMS READY")

    def get_xyz(self, q):
        """Get x,y,z coordinates 
        Args:
            q: a numpy array of joints angle positions.
        Returns:
            xyz are the x,y,z coordinates of an end-effector in a Cartesian space.
        """
        mat = ur_utils.forward(q, self._ik_params)
        xyz = mat[:3, 3]
        return xyz

    def get_orientation(self, q):
        """Get Euler angles 
        Args:
            q: a numpy array of joints angle positions.
        Returns:
            xyz are the x,y,z coordinates of an end-effector in a Cartesian space.
        """
        mat = ur_utils.forward(q, self._ik_params)
        orientation = mat[0:3, 0:3]
        roll = -orientation[1, 2]
        pitch = orientation[0, 2]
        yaw = -orientation[0, 1]

        return Vector3(roll, pitch, yaw)

    def cvt_quat_to_euler(self, quat):
        euler_rpy = Vector3()
        euler = tf.transformations.euler_from_quaternion(
            [self.quat.x, self.quat.y, self.quat.z, self.quat.w])

        euler_rpy.x = euler[0]
        euler_rpy.y = euler[1]
        euler_rpy.z = euler[2]
        return euler_rpy

    def init_joints_pose(self, init_pos):
        """
        We initialise the Position variable that saves the desired position where we want our
        joints to be
        :param init_pos:
        :return:
        """
        self.current_joint_pose = []
        self.current_joint_pose = copy.deepcopy(init_pos)
        return self.current_joint_pose

    def get_euclidean_dist(self, p_in, p_pout):
        """
        Given a Vector3 Object, get distance from current position
        :param p_end:
        :return:
        """
        a = numpy.array((p_in.x, p_in.y, p_in.z))
        b = numpy.array((p_pout.x, p_pout.y, p_pout.z))

        distance = numpy.linalg.norm(a - b)

        return distance

    def joints_state_callback(self, msg):
        self.joints_state = msg

    def get_observations(self):
        """
        Returns the state of the robot needed for OpenAI QLearn Algorithm
        The state will be defined by an array
        :return: observation
        """
        joint_states = self.joints_state
        shp_joint_ang = joint_states.position[0]
        shl_joint_ang = joint_states.position[1]
        elb_joint_ang = joint_states.position[2]
        wr1_joint_ang = joint_states.position[3]
        wr2_joint_ang = joint_states.position[4]
        wr3_joint_ang = joint_states.position[5]

        shp_joint_vel = joint_states.velocity[0]
        shl_joint_vel = joint_states.velocity[1]
        elb_joint_vel = joint_states.velocity[2]
        wr1_joint_vel = joint_states.velocity[3]
        wr2_joint_vel = joint_states.velocity[4]
        wr3_joint_vel = joint_states.velocity[5]

        q = [
            shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang,
            wr2_joint_ang, wr3_joint_ang
        ]
        eef_x, eef_y, eef_z = self.get_xyz(q)

        observation = []
        rospy.logdebug("List of Observations==>" + str(self.observations))
        for obs_name in self.observations:
            if obs_name == "shp_joint_ang":
                observation.append(shp_joint_ang)
            elif obs_name == "shl_joint_ang":
                observation.append(shl_joint_ang)
            elif obs_name == "elb_joint_ang":
                observation.append(elb_joint_ang)
            elif obs_name == "wr1_joint_ang":
                observation.append(wr1_joint_ang)
            elif obs_name == "wr2_joint_ang":
                observation.append(wr2_joint_ang)
            elif obs_name == "wr3_joint_ang":
                observation.append(wr3_joint_ang)
            elif obs_name == "shp_joint_vel":
                observation.append(shp_joint_vel)
            elif obs_name == "shl_joint_vel":
                observation.append(shl_joint_vel)
            elif obs_name == "elb_joint_vel":
                observation.append(elb_joint_vel)
            elif obs_name == "wr1_joint_vel":
                observation.append(wr1_joint_vel)
            elif obs_name == "wr2_joint_vel":
                observation.append(wr2_joint_vel)
            elif obs_name == "wr3_joint_vel":
                observation.append(wr3_joint_vel)
            elif obs_name == "eef_x":
                observation.append(eef_x)
            elif obs_name == "eef_y":
                observation.append(eef_y)
            elif obs_name == "eef_z":
                observation.append(eef_z)
            else:
                raise NameError('Observation Asked does not exist==' +
                                str(obs_name))

        return observation

    def clamp_to_joint_limits(self):
        """
        clamps self.current_joint_pose based on the joint limits
        self._joint_limits
        {
         "shp_max": shp_max,
         "shp_min": shp_min,
         ...
         }
        :return:
        """

        rospy.logdebug("Clamping current_joint_pose>>>" +
                       str(self.current_joint_pose))
        shp_joint_value = self.current_joint_pose[0]
        shl_joint_value = self.current_joint_pose[1]
        elb_joint_value = self.current_joint_pose[2]
        wr1_joint_value = self.current_joint_pose[3]
        wr2_joint_value = self.current_joint_pose[4]
        wr3_joint_value = self.current_joint_pose[5]

        self.current_joint_pose[0] = max(
            min(shp_joint_value, self._joint_limits["shp_max"]),
            self._joint_limits["shp_min"])
        self.current_joint_pose[1] = max(
            min(shl_joint_value, self._joint_limits["shl_max"]),
            self._joint_limits["shl_min"])
        self.current_joint_pose[2] = max(
            min(elb_joint_value, self._joint_limits["elb_max"]),
            self._joint_limits["elb_min"])
        self.current_joint_pose[3] = max(
            min(wr1_joint_value, self._joint_limits["wr1_max"]),
            self._joint_limits["wr1_min"])
        self.current_joint_pose[4] = max(
            min(wr2_joint_value, self._joint_limits["wr2_max"]),
            self._joint_limits["wr2_min"])
        self.current_joint_pose[5] = max(
            min(wr3_joint_value, self._joint_limits["wr3_max"]),
            self._joint_limits["wr3_min"])

        rospy.logdebug("DONE Clamping current_joint_pose>>>" +
                       str(self.current_joint_pose))

    # Resets the state of the environment and returns an initial observation.
    def reset(self):
        # 0st: We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self._gz_conn.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self._gz_conn.resetSim()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self._gz_conn.change_gravity(0.0, 0.0, 0.0)

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        rospy.logdebug("reset_ur_joint_controllers...")
        self._ctrl_conn.reset_ur_joint_controllers(self._ctrl_type)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_pose init variable...>>>" +
                       str(self.init_joint_pose))
        # We save that position as the current joint desired position
        init_pos = self.init_joints_pose(self.init_joint_pose)
        print("init_pos")

        # 4th: We Set the init pose to the jump topic so that the jump control can update
        # We check the jump publisher has connection

        if self._ctrl_type == 'traj_vel':
            self._joint_traj_pubisher.check_publishers_connection()
        elif self._ctrl_type == 'vel':
            self._joint_pubisher.check_publishers_connection()
        else:
            rospy.logwarn("Controller type is wrong!!!!")
        print("check_publishers_connection ")

        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
        rospy.logdebug("check_all_systems_ready...")
        self.check_all_systems_ready()

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self._gz_conn.change_gravity(0.0, 0.0, -9.81)

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self._gz_conn.pauseSim()
        # self._init_obj_pose()

        # 8th: Get the State Discrete Stringuified version of the observations
        rospy.logdebug("get_observations...")
        observation = self.get_observations()

        print('Reset final')
        return observation

    def _act(self, action):
        if self._ctrl_type == 'traj_vel':
            self.pre_ctrl_type = 'traj_vel'
            self._joint_traj_pubisher.move_joints(action)
        elif self._ctrl_type == 'vel':
            self.pre_ctrl_type = 'vel'
            self._joint_pubisher.move_joints(action)
        else:
            self._joint_pubisher.move_joints(action)

    def step(self, action):
        rospy.logdebug("UR step func")

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # Act
        self._gz_conn.unpauseSim()
        self._act(action)

        # Then we send the command to the robot and let it go
        # for running_step seconds
        time.sleep(self.running_step)
        self._gz_conn.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        observation = self.get_observations()

        # finally we get an evaluation based on what happened in the sim
        reward = self.compute_dist_rewards()
        done = self.check_done()

        return observation, reward, done, {}

    def compute_dist_rewards(self):
        return 0

    def check_done(self):
        return True
예제 #16
0
class RobomasterEnv:
    def __init__(self):
        #Set up publishers for motion
        self.pub_cmd_vel = [rospy.Publisher('roborts_{0}/cmd_vel_acc'.format(i + 1), TwistAccel, queue_size=1) for i in range(4)]
        self.cmd_vel = [TwistAccel() for i in range(4)]
        self.pub_gimbal_angle = [rospy.Publisher('roborts_{0}/cmd_gimbal_angle'.format(i + 1), GimbalAngle, queue_size=1) for i in range(4)]
        self.cmd_gimbal = [GimbalAngle() for i in range(4)]

        #Set up state parameters
        self._odom_info = [[None]] * 4 
        self._gimbal_angle_info = [[None]] * 4
        self._robot_hp = [[2000], [2000], [2000], [2000]]
        self._num_projectiles = [[50], [50], [50], [50]]
        self._barrel_heat = [[0], [0], [0], [0]]
        self._launch_velocity_max = 25
        self._shoot = [0, 0, 0, 0]

        #Robot constants
        def diag_stats_helper(l, w):
            angle = math.atan(w / l)
            return angle, w / math.sin(angle) / 2

        self.robot_coords = [None] * 4
        self.robot_plates_coords = [None] * 4

        #Robot parameters
        self.robot_length, self.robot_width = 0.550, 0.420
        self.robot_diag_angle, self.robot_diag_length = diag_stats_helper(self.robot_length, self.robot_width)

        self.armor_size = 0.131
        self.armor_thickness = 0.015
        self.forward_frame_length = self.robot_length + self.armor_thickness * 2
        self.forward_frame_diag_angle, self.forward_frame_diag_length = diag_stats_helper(self.forward_frame_length, self.armor_size)
        
        self.sideway_frame_width = self.robot_width + self.armor_thickness * 2
        self.sideway_frame_diag_angle, self.sideway_frame_diag_length = diag_stats_helper(self.armor_size, self.sideway_frame_width)

        self.gimbal_angle_range = 82.5 / 180 * math.pi

        # more on this later
        self.gimbal_shoot_range = float('inf')
        #Set up subscribers for state
        self.sub_odom = [rospy.Subscriber('roborts_{0}/ground_truth/state'.format(i+1), Odometry, self.odometry_callback) for i in range(4)]
        self.sub_gimbal_angle = [rospy.Subscriber('roborts_{0}/joint_states'.format(i+1), JointState, self.gimbal_angle_callback) for i in range(4)]

        #Amount of time running per timestep
        self._running_step = 0.1

        #Max number of timesteps
        self._timestep = 0
        self._max_timesteps = 1000 

        #Conversion from timestep to real time
        self._real_time_conversion = 1

        #Gives coordinates for the obstacles:
        #x-start, y-start, x-end, y-end
        self.parallel_obstacles = [
            (1.500, 0, 1.750, 1.000),
            (3.600, 1.000, 4.600, 1.250),
            (7.100, 1.000, 8.100, 1.250),
            (1.500, 2.425, 2.300, 2.675),
            (5.800, 2.425, 6.600, 2.675),
            (0.000, 3.850, 1.000, 4.100),
            (3.500, 3.850, 4.500, 4.100),
            (6.350, 4.100, 6.600, 5.100),
        ]
        #points [top, middle, end, left, middle, right]
        diagonal_len = .300 / math.sqrt(2)
        self.center_obstacle = (4.050 - diagonal_len, 4.050, 4.050 + diagonal_len, 2.550 - diagonal_len, 2.550, 2.550 + diagonal_len)
        
        # initialize segments of each obstacle for blocking calculation
        # buffer is added
        segments = []
        buffer = 0.015
        for xl, yb, xr, yt in self.parallel_obstacles:
            xr, xr, yb, yt = xr - buffer, xr + buffer, yb - buffer, yt + buffer
            segments.extend([(xl, yb, xl, yt), (xl, yb, xr, yb), (xr, yt, xr, yb), (xr, yt, xl, yt)])
        top, hmid, bot, left, vmid, right = self.center_obstacle
        segments.extend([(left - buffer, hmid, right + buffer, hmid), (vmid, bot - buffer, vmid, top + buffer)])
        self.segments = segments

        self.armor_plate_damage = [20, 40, 40, 60]

        self._zones = [
        [3.830, 4.370, 0.270, 0.750],
        [1.630, 2.170, 1.695, 2.175],
        [0.230, 0.770, 3.120, 3.600],
        [4.350, 4.830, 4.500, 5.040], 
        [5.330, 7.870, 1.500, 1.980],
        [5.930, 6.470, 2.925, 3.405]]
        self._zones_active = [False] * 6

        #Zone Types:
        #0: refill
        #1: hp recovery
        #2: no movement debuff
        #3: no shoot debuff
        self._zone_types = [0, 3, 2]

        #Effects per robot
        self._robot_effects = [dict(), dict(), dict(), dict()]

        #Set up gazebo connection
        self.gazebo = GazeboConnection()
        self.gazebo.pauseSim()

    def quaternion_to_euler(self, x, y, z, w):
        t0 = +2.0 * (w * x + y * z)
        t1 = +1.0 - 2.0 * (x * x + y * y)
        X = math.atan2(t0, t1)

        t2 = +2.0 * (w * y - z * x)
        t2 = +1.0 if t2 > +1.0 else t2
        t2 = -1.0 if t2 < -1.0 else t2
        Y = math.asin(t2)

        t3 = +2.0 * (w * z + x * y)
        t4 = +1.0 - 2.0 * (y * y + z * z)
        Z = math.atan2(t3, t4)

        return X, Y, Z 

    def check_collision_with_obstacle(robot1, robot2):
        pass

    def robot_coords_from_odom(self, odom_info):
        x, y, z, yaw = odom_info
        coords = []
        for angle in (yaw + self.robot_diag_angle, yaw + math.pi - self.robot_diag_angle, yaw + math.pi + self.robot_diag_angle, yaw - self.robot_diag_angle):
            xoff, yoff = math.cos(angle) * self.robot_diag_length, math.sin(angle) * self.robot_diag_length
            coords.extend([x + xoff, y + yoff])
        return coords

    def get_enemy_team(self, robot_index):
        if robot_index < 2:
            return (2, 3)
        return (0, 1)

    def plate_coords_from_odom(self, odom_info):
        x, y, z, yaw = odom_info
        frontx1off, fronty1off = math.cos(yaw + self.forward_frame_diag_angle) * self.forward_frame_diag_length, math.sin(yaw + self.forward_frame_diag_angle) * self.forward_frame_diag_length
        frontx2off, fronty2off = math.cos(yaw - self.forward_frame_diag_angle) * self.forward_frame_diag_length, math.sin(yaw - self.forward_frame_diag_angle) * self.forward_frame_diag_length
        backx1off, backy1off = math.cos(yaw + math.pi + self.forward_frame_diag_angle) * self.forward_frame_diag_length, math.sin(yaw + math.pi + self.forward_frame_diag_angle) * self.forward_frame_diag_length
        backx2off, backy2off = math.cos(yaw + math.pi - self.forward_frame_diag_angle) * self.forward_frame_diag_length, math.sin(yaw + math.pi - self.forward_frame_diag_angle) * self.forward_frame_diag_length
        leftx1off, lefty1off = math.cos(yaw + self.sideway_frame_diag_angle) * self.sideway_frame_diag_length, math.sin(yaw + math.pi - self.sideway_frame_diag_angle) * self.sideway_frame_diag_length
        rightx2off, righty2off  = math.cos(yaw - self.sideway_frame_diag_angle) * self.sideway_frame_diag_length, math.sin(yaw - self.sideway_frame_diag_angle) * self.sideway_frame_diag_length
        rightx1off, righty1off = math.cos(yaw + math.pi + self.sideway_frame_diag_angle) * self.sideway_frame_diag_length, math.sin(yaw + math.pi + self.sideway_frame_diag_angle) * self.sideway_frame_diag_length
        leftx2off, lefty2off = math.cos(yaw + math.pi - self.sideway_frame_diag_angle) * self.sideway_frame_diag_length, math.sin(yaw + math.pi - self.sideway_frame_diag_angle) * self.sideway_frame_diag_length
        return [(x + frontx1off, y + fronty1off, x + frontx2off, y + fronty2off),
                (x + leftx1off, y + lefty1off, x + leftx2off, y + lefty2off),
                (x + rightx1off, y + righty1off, x + rightx2off, y + righty2off),
                (x + backx1off, y + backy1off, x + backx2off, y + backy2off)]

    def update_robot_coords(self, _id):
        self.robot_coords[_id] = self.robot_coords_from_odom(self._odom_info[_id])
        self.robot_plates_coords[_id] = self.plate_coords_from_odom(self._odom_info[_id])

    # pass in from_robot_index so it doesn't check for the robot itself!
    # otherwise, each robot blocks its own line-of-sight
    def is_line_of_sight_blocked(self, x1, y1, x2, y2, from_robot_index=None):
        # uninitiated
        if not self.robot_coords:
            return True
        for robot_index in range(4):
            if robot_index != from_robot_index:
                _x1, _y1, _x2, _y2, _x3, _y3, _x4, _y4 = self.robot_coords[robot_index]
                if lines_cross(x1, y1, x2, y2, _x1, _y1, _x3, _y3) or lines_cross(x1, y1, x2, y2, _x2, _y2, _x4, _y4):
                    return True
        for xl, yl, xh, yh in self.segments:
            if lines_cross(x1, y1, x2, y2, xl, yl, xh, yh):
                return True
        return False

    def get_visible_enemy_plates(self, robot_index):
        enemy1, enemy2 = self.get_enemy_team(robot_index)
        visible = []
        x, y, z, yaw = self._odom_info[robot_index]
        for index in range(4):
            x1, y1, x2, y2 = self.robot_plates_coords[enemy1][index]
            if distance(x, y, x1, y1) < self.gimbal_shoot_range and distance(x, y, x2, y2) < self.gimbal_shoot_range and angleDiff(angleTo(x, y, x1, y1), yaw) <= self.gimbal_angle_range and angleDiff(angleTo(x, y, x2, y2), yaw) <= self.gimbal_angle_range:
                if not self.is_line_of_sight_blocked(x, y, x1, y1, robot_index) and not self.is_line_of_sight_blocked(x, y, x2, y2, robot_index):
                    visible.extend([(enemy1, self.armor_plate_damage[index], angleBetween(x, y, x1, y1, x2, y2))])
        for index in range(4):
            x1, y1, x2, y2 = self.robot_plates_coords[enemy2][index]
            if distance(x, y, x1, y1) < self.gimbal_shoot_range and distance(x, y, x2, y2) < self.gimbal_shoot_range and angleDiff(angleTo(x, y, x1, y1), yaw) <= self.gimbal_angle_range and angleDiff(angleTo(x, y, x2, y2), yaw) <= self.gimbal_angle_range:
                if not self.is_line_of_sight_blocked(x, y, x1, y1, robot_index) and not self.is_line_of_sight_blocked(x, y, x2, y2, robot_index):
                    visible.extend([(enemy2, self.armor_plate_damage[index], angleBetween(x, y, x1, y1, x2, y2))])
        return visible

    def odometry_callback(self, msg):
        _id = int(msg._connection_header['topic'][9]) - 1
        self._odom_info[_id] = [msg.pose.pose.position.x, msg.pose.pose.position.y, msg.pose.pose.position.z, self.quaternion_to_euler(msg.pose.pose.orientation.x, msg.pose.pose.orientation.y, msg.pose.pose.orientation.z, msg.pose.pose.orientation.w)[2]]
        self.update_robot_coords(_id)

    def gimbal_angle_callback(self, msg):
        self._gimbal_angle_info[int(msg._connection_header['topic'][9]) - 1] = msg.position 
        return

    def _timestep_to_real_time(self):
        #converts timestep to real time
        return self._timestep * self._running_step * self._real_time_conversion 

    def _check_in_zone(self, position):
        #Input: x y z position
        #Checks if robot are in a buff/debuff zone, returns the zone
        for i in range(len(self._zones)):
            edge_buffer = 0.05 if self._zone_types[i%3] == 0 or self._zone_types[i%3] == 1 else -0.05
            if position[0] >= self._zones[i][0] + edge_buffer and position[0] <= self._zones[i][1] - edge_buffer and position[1] >= self._zones[i][2] + edge_buffer and position[1] <= self._zones[i][3] - edge_buffer:
                return i
        return None

    def step(self, action1, action2): 
        #x velocity, y velocity, twist angular, shoot

        if len(action1) != 8 or len(action2) != 8:
            print("Invalid action!")
            return None, None, None, {} 

        #Check for/update debuffs to robots if in zone
        for position in range(len(self._odom_info)):
            if self._odom_info[position][0] is not None:
                zone = self._check_in_zone(self._odom_info[position])
                if zone is None:
                    continue
                if not self._zones_active[zone]:
                     self._zones_active[zone] = True
                else:
                    continue
                if position == 0:
                    print(zone)
                    #print(zone, self._odom_info[position])
                if zone is None:
                    continue
                elif self._zone_types[zone%3] == 0 or self._zone_types[zone%3] == 1:
                    if zone < 3:
                        self._robot_effects[0][self._zone_types[zone%3]] = True
                        self._robot_effects[1][self._zone_types[zone%3]] = True
                    else:
                        self._robot_effects[2][self._zone_types[zone%3]] = True
                        self._robot_effects[3][self._zone_types[zone%3]] = True
                else:
                    self._robot_effects[position][self._zone_types[zone%3]] = 10
         
        #Apply buffs/debuffs
        for robot_number in range(len(self._robot_effects)):
            if self._robot_effects[robot_number].get(0, False):
                self._num_projectiles[robot_number] += 100
                self._robot_effects[robot_number][0] = False
            if self._robot_effects[robot_number].get(1, False):
                self._robot_hp[robot_number] = min(2000, self.robot_hp[robot_number] + 200)
                self._robot_effects[robot_number][1] = False
            if  self._robot_effects[robot_number].get(2, 0) > 0:
                if robot_number == 0:
                    action1[0] = 0
                    action1[1] = 0
                    action1[2] = 0
                elif robot_number == 1:
                    action1[4] = 0
                    action1[5] = 0
                    action1[6] = 0 
                elif robot_number == 2:
                    action2[0] = 0
                    action2[1] = 0
                    action2[2] = 0
                else:
                    action2[4] = 0
                    action2[5] = 0
                    action2[6] = 0
                self._robot_effects[robot_number][2] -= self._running_step
            if self._robot_effects[robot_number].get(3, 0) > 0:
                if robot_number == 0:
                    action1[3] = 0
                elif robot_number == 1:
                    action1[7] = 0
                elif robot_number == 2:
                    action2[3] = 0
                elif robot_number == 3:
                    action2[7] = 0
                self._robot_effects[robot_number] -= self._running_step
 
        #Set action parameters for publisher
        self.cmd_vel[0].twist.linear.x = action1[0]
        self.cmd_vel[0].twist.linear.y = action1[1]
        self.cmd_vel[0].twist.angular.z = action1[2]
        self._shoot[0] = action1[3]
        self.cmd_vel[1].twist.linear.x = action1[4]
        self.cmd_vel[1].twist.linear.y = action1[5]
        self.cmd_vel[1].twist.angular.z = action1[6]
        self._shoot[1] = action1[7]

        self.cmd_vel[2].twist.linear.x = action2[0]
        self.cmd_vel[2].twist.linear.y = action2[1]
        self.cmd_vel[2].twist.angular.z = action2[2]
        self._shoot[2] = action2[3]
        self.cmd_vel[3].twist.linear.x = action2[4]
        self.cmd_vel[3].twist.linear.y = action2[5]
        self.cmd_vel[3].twist.angular.z = action2[6]
        self._shoot[3] = action2[7]

        #Publish actions and execute for running_step time
        for i in range(4):
            self.pub_cmd_vel[i].publish(self.cmd_vel[i])
            self.pub_gimbal_angle[i].publish(self.cmd_gimbal[i])

        #Unpause Simulation
        self.gazebo.unpauseSim()
    
        time.sleep(self._running_step) 
        
        #Pause simulation
        self.gazebo.pauseSim()

        #calculate state and reward
        state = self.get_state()
        reward = self.get_reward(state, action1, action2)
        done = self._timestep >= self._max_timesteps

        return state, reward, done, {}
       

    def reset(self):
        self._zones_active = [False] * 6
        self._zone_types = [0, 2, 3]
        self._robot_debuffs = [[], [], [], []]
        self._timestep = 0
        self.real_time = 0
        self._robot_hp = [[2000], [2000], [2000], [2000]]
        self._num_projectiles = [[50], [50], [50], [50]]
        self._barrel_heat = [[0], [0], [0], [0]]
        self.gazebo.pauseSim()
        self.gazebo.resetSim()

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        #self.controllers_object.reset_monoped_joint_controllers()

        self.gazebo.pauseSim()
        state = self.get_state()

        return state

    def get_state(self):
        #xyz position, robot angle, gimbal angle, number of projectiles, barrel heat of shooter, robot hp
        #only use barrel heat for your robots

        #Sizes of parameters
        #xyz: 3
        #robot angle: 1
        #gimbal_angle: 2
        #number_of_projectiles: 1
        #barrel heat: 1 
        #robot hp: 1

        robot_state = [list(self._odom_info[i]) + self._num_projectiles[i] + self._barrel_heat[i] + self._robot_hp[i] for i in range(4)]
        return [robot_state[0] + robot_state[1] + robot_state[2] + robot_state[3], robot_state[0] + robot_state[1] + robot_state[2] + robot_state[3]]

    def get_reward(self, state, action1, action2):
        return 0
예제 #17
0
class DroneController(gym.Env):

    # Initialize the commands
    def __init__(self):

        # initialise state
        # ground truth position
        self.x = 0
        self.y = 0
        self.z = 0
        # ground truth velocity
        self.x_dot = 0
        self.y_dot = 0
        self.z_dot = 0
        # ground truth quaternion
        self.imu_x = 0  # q0
        self.imu_y = 0  # q1
        self.imu_z = 0  # q2
        self.imu_w = 0  # q3
        # nav drone angle
        self.rot_x = 0  # roll
        self.rot_y = 0  # pitch
        self.rot_z = 0  # yaw
        # Optitrack Information
        self.x_real = 0
        self.y_real = 0
        self.z_real = 0

        self.dist = 0
        # Subscribe to the /ardrone/navdata topic, of message type navdata, and call self.ReceiveNavdata when a message is received
        self.subNavdata = rospy.Subscriber('/ardrone/navdata', Navdata,
                                           self.receive_navdata)
        self.subOdom = rospy.Subscriber('/ground_truth/state', Odometry,
                                        self.odom_callback)
        # rospy.Subscriber('/vrpn_client_node/Rigid_grant/pose', PoseStamped, self.optictrack_callback)

        # Allow the controller to publish to the /ardrone/takeoff, land and reset topics
        self.pubLand = rospy.Publisher('/ardrone/land', Empty, queue_size=0)
        self.pubTakeoff = rospy.Publisher('/ardrone/takeoff',
                                          Empty,
                                          queue_size=0)
        self.pubReset = rospy.Publisher('/ardrone/reset', Empty, queue_size=1)

        # Allow the controller to publish to the /cmd_vel topic and thus control the drone
        self.pubCommand = rospy.Publisher('/cmd_vel', Twist, queue_size=1)

        # Put location into odometry
        self.location = Odometry()
        self.status = Navdata()
        self.loc_real = PoseStamped()

        # Gets parameters from param server
        self.speed_value = rospy.get_param("/speed_value")
        self.run_step = rospy.get_param("/run_step")
        self.target_x = rospy.get_param("/desired_pose/x")
        self.target_y = rospy.get_param("/desired_pose/y")
        self.target_z = rospy.get_param("/desired_pose/z")
        self.desired_pose = np.array(
            [self.target_x, self.target_y, self.target_z])
        # self.desired_pose = [0, 0, 1.5]
        self.max_incl = rospy.get_param("/max_incl")
        self.max_altitude = rospy.get_param("/max_altitude")
        self.min_altitude = rospy.get_param("/min_altitude")
        self.x_max = rospy.get_param("/max_pose/max_x")
        self.y_max = rospy.get_param("/max_pose/max_y")

        self.on_place = 0
        self.count_on_place = 0

        # initialize action space
        # Forward,Left,Right,Up,Down
        # self.action_space = spaces.Discrete(8)
        self.action_space = spaces.Discrete(7)
        self.up_bound = np.array([np.inf, np.inf, np.inf, np.inf, 1])
        self.low_bound = np.array([-np.inf, -np.inf, -np.inf, -np.inf, 0])
        self.observation_space = spaces.Box(
            self.low_bound,
            self.up_bound)  # position[x,y,z], linear velocity[x,y,z]
        #self.observation_space = spaces.Box(-np.inf, np.inf, (9,))
        # Gazebo Connection
        self.gazebo = GazeboConnection()

        self._seed()
        # Land the drone if we are shutting down
        rospy.on_shutdown(self.send_land)

    # ---------------------- Basic Functions ---------------------- #
    # Receive the navigation data
    def receive_navdata(self, navdata):

        self.rot_x = navdata.rotX
        self.rot_y = navdata.rotY
        self.rot_z = navdata.rotZ

    # call back odo data to subscriber
    def odom_callback(self, data):

        self.x = data.pose.pose.position.x
        self.y = data.pose.pose.position.y
        self.z = data.pose.pose.position.z
        self.imu_x = data.pose.pose.orientation.x
        self.imu_y = data.pose.pose.orientation.y
        self.imu_z = data.pose.pose.orientation.z
        self.imu_w = data.pose.pose.orientation.w
        self.x_dot = data.twist.twist.linear.x
        self.y_dot = data.twist.twist.linear.y
        self.z_dot = data.twist.twist.linear.z

    # call back the position and orientation to subscriber
    def optictrack_callback(self, data):

        self.x_real = data.pose.position.x
        self.y_real = data.pose.position.y
        self.z_real = data.pose.position.z
        # self.imu_x_real = opt_data.pose.position.x
        # self.imu_y_real = opt_data.pose.position.y
        # self.imu_z_real = opt_data.pose.position.z
        # self.imu_w_real = opt_data.pose.position.w

    # take observation for pose and orientation
    # orientation is used for calculate euler angle
    def take_observation(self):
        # state = [self.x, self.y, self.z, self.x_dot, self.y_dot, self.z_dot, self.rot_x, self.rot_y, self.rot_z]
        state = [self.x, self.y, self.z]
        return state

    # Takeoff
    def send_takeoff(self):
        # Send a takeoff message to the ardrone driver
        self.reset_action()
        time.sleep(0.5)
        self.pubTakeoff.publish(Empty())

    # Send Land Message
    def send_land(self):
        # Send a landing message to the ardrone driver
        # Note we send this in all states, landing can do no harm
        self.reset_action()
        time.sleep(0.5)
        self.pubLand.publish(Empty())

    # Send emergency message to drone
    def send_emergency(self):
        # Send an emergency (or reset) message to the ardrone driver
        self.pubReset.publish(Empty())

    # Take Action
    def take_action(self, roll, pitch, z_velocity, yaw_velocity=0):

        # Called by the main program to set the current command
        command = Twist()
        command.linear.x = pitch  # go y direction / green axis
        command.linear.y = roll  # go x direction / red axis
        command.linear.z = z_velocity  # go z direction / blue axis
        command.angular.x = 0
        command.angular.y = 0
        command.angular.z = yaw_velocity  # Spinning anti-clockwise

        self.pubCommand.publish(command)

    # ---------------------- Initialize Simulation ---------------------- #
    # reset command action and takeoff the drone
    def takeoff_sequence(self, seconds_taking_off=2):
        # Before taking off be sure that cmd_vel value there is is null to avoid drifts
        self.reset_action()
        # wait for 1 seconds
        time.sleep(1)
        rospy.loginfo("Taking-Off Start")
        self.send_takeoff()
        time.sleep(seconds_taking_off)
        self.reset_action()
        rospy.loginfo("Taking-Off sequence completed")

    # Check if any topic is published
    def check_topic_publishers_connection(self):

        rate = rospy.Rate(10)  # 10hz
        while self.pubTakeoff.get_num_connections() == 0:
            rospy.loginfo(
                "No susbribers to Takeoff yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Takeoff Publisher Connected")

        while self.pubCommand.get_num_connections() == 0:
            rospy.loginfo(
                "No susbribers to Cmd_vel yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Cmd_vel Publisher Connected")

    # Reset the action
    def reset_action(self):
        self.take_action(0, 0, 0)

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Reset the simulation
    def _reset(self):
        self.gazebo.unpauseSim()
        # reset action
        self.reset_action()

        self.on_place = 0
        self.count_on_place = 0

        self.send_land()
        time.sleep(0.6)

        time.sleep(1)
        # reset the pose on the simulation
        self.gazebo.resetSim()
        #self.gazebo.set_location()

        # check any topic published
        self.check_topic_publishers_connection()

        # takeoff the drone
        self.takeoff_sequence()

        # take observation
        state = self.take_observation()

        init_pose = [state[0], state[1], state[2]]
        self.dist = self.calculate_dist(init_pose)
        self.prev_pose = init_pose

        self.gazebo.pauseSim()

        state = np.concatenate(
            (state, [self.dist], [1. if self.on_place else 0.]))
        return state

    # ---------------------- Action Processing ---------------------- #
    # take an action from action space [0,5]
    def _step(self, action):

        self.gazebo.unpauseSim()

        if action == 0:  # FORWARD
            self.take_action(0, self.speed_value, 0)
        elif action == 1:  # BACKWARD
            self.take_action(0, -self.speed_value, 0)
        elif action == 2:  # LEFT
            self.take_action(self.speed_value, 0, 0)
        elif action == 3:  # RIGHT
            self.take_action(-self.speed_value, 0, 0)
        elif action == 4:  # Up
            self.take_action(0, 0, self.speed_value)
        elif action == 5:  # Down
            self.take_action(0, 0, -self.speed_value)
        elif action == 6:  # Stay
            self.take_action(0, 0, 0, 0)

        time.sleep(self.run_step)

        state = self.take_observation()
        self.gazebo.pauseSim()

        data_pose = np.array([state[0], state[1], state[2]])
        data_imu = [self.imu_x, self.imu_y, self.imu_z, self.imu_w]
        reward, done, self.dist = self.process_data(data_pose, data_imu)

        self.prev_pose = data_pose
        state = np.concatenate(
            (state, [self.dist], [1. if self.on_place else 0.]))
        return state, reward, done, {}

    # calculate the distance between two location
    def calculate_dist(self, data_pose):
        # err = np.subtract(data_pose, self.desired_pose)
        # w = np.array([1, 1, 4])
        # err = np.multiply(w, err)
        # dist = np.linalg.norm(err)
        x_dist = data_pose[0] - self.desired_pose[0]
        y_dist = data_pose[1] - self.desired_pose[1]
        z_dist = data_pose[2] - self.desired_pose[2]
        self.dist = np.sqrt(x_dist**2 + y_dist**2 + z_dist**2)
        return self.dist

    def get_reward(self, data_pose):
        reward = 0
        reach_goal = False

        self.dist = self.calculate_dist(data_pose)
        rospy.loginfo(str(self.dist))
        reward -= 0.03 * self.dist
        # current_pose = [data_pose[0], data_pose[1], data_pose[2]]
        if self.dist < 0.45:
            reward += 1
            self.on_place += 1
            self.count_on_place += 1
            if self.on_place > 100:
                reach_goal = True
        else:
            self.on_place = 0

        return reward, reach_goal, self.dist

    def process_data(self, data_pose, data_imu):
        done = False

        euler = tf.transformations.euler_from_quaternion(
            [data_imu[0], data_imu[1], data_imu[2], data_imu[3]])
        roll = euler[0]
        pitch = euler[1]
        yaw = euler[2]

        pitch_bad = not (-self.max_incl < pitch < self.max_incl)
        roll_bad = not (-self.max_incl < roll < self.max_incl)
        altitude_bad = not (self.min_altitude < data_pose[2] <
                            self.max_altitude)
        x_bad = not (-self.x_max < data_pose[0] < self.x_max)
        y_bad = not (-self.y_max < data_pose[1] < self.y_max)

        if altitude_bad or pitch_bad or roll_bad or x_bad or y_bad:
            rospy.loginfo("(Drone flight status is wrong) >> " + "[" +
                          str(altitude_bad) + "," + str(pitch_bad) + "," +
                          str(roll_bad) + "," + str(x_bad) + "," + str(y_bad) +
                          "]")
            done = True
            reward = -5
        else:
            reward, reach_goal, self.dist = self.get_reward(data_pose)
            if reach_goal:
                done = True

        return reward, done, self.dist
class URSimEnv(robot_gazebo_env_goal.RobotGazeboEnv):
    def __init__(self):
        # We assume that a ROS node has already been created before initialising the environment

        # Gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_height = rospy.get_param("/max_height")
        self.min_height = rospy.get_param("/min_height")
        self.joint_increment_value = rospy.get_param("/joint_increment_value")
        self.done_reward = rospy.get_param("/done_reward")
        self.alive_reward = rospy.get_param("/alive_reward")
        self.desired_force = rospy.get_param("/desired_force")
        self.desired_yaw = rospy.get_param("/desired_yaw")

        self.list_of_observations = rospy.get_param("/list_of_observations")

        haa_max = rospy.get_param("/joint_limits_array/haa_max")
        haa_min = rospy.get_param("/joint_limits_array/haa_min")
        hfe_max = rospy.get_param("/joint_limits_array/hfe_max")
        hfe_min = rospy.get_param("/joint_limits_array/hfe_min")
        kfe_max = rospy.get_param("/joint_limits_array/kfe_max")
        kfe_min = rospy.get_param("/joint_limits_array/kfe_min")
        self.joint_limits = {
            "haa_max": haa_max,
            "haa_min": haa_min,
            "hfe_max": hfe_max,
            "hfe_min": hfe_min,
            "kfe_max": kfe_max,
            "kfe_min": kfe_min
        }

        self.discrete_division = rospy.get_param("/discrete_division")

        self.maximum_base_linear_acceleration = rospy.get_param(
            "/maximum_base_linear_acceleration")
        self.maximum_base_angular_velocity = rospy.get_param(
            "/maximum_base_angular_velocity")
        self.maximum_joint_effort = rospy.get_param("/maximum_joint_effort")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        self.weight_r4 = rospy.get_param("/weight_r4")
        self.weight_r5 = rospy.get_param("/weight_r5")

        haa_init_value = rospy.get_param("/init_joint_pose/haa")
        hfe_init_value = rospy.get_param("/init_joint_pose/hfe")
        kfe_init_value = rospy.get_param("/init_joint_pose/kfe")
        self.init_joint_pose = [haa_init_value, hfe_init_value, kfe_init_value]

        # Fill in the Done Episode Criteria list
        self.episode_done_criteria = rospy.get_param("/episode_done_criteria")

        # Jump Increment Value in Radians
        self.jump_increment = rospy.get_param("/jump_increment")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()
        self.controllers_object = ControllersConnection(namespace="ur")

        self.ur_state_object = URState(
            max_height=self.max_height,
            min_height=self.min_height,
            abs_max_roll=self.max_incl,
            abs_max_pitch=self.max_incl,
            joint_increment_value=self.joint_increment_value,
            list_of_observations=self.list_of_observations,
            joint_limits=self.joint_limits,
            episode_done_criteria=self.episode_done_criteria,
            done_reward=self.done_reward,
            alive_reward=self.alive_reward,
            desired_force=self.desired_force,
            desired_yaw=self.desired_yaw,
            weight_r1=self.weight_r1,
            weight_r2=self.weight_r2,
            weight_r3=self.weight_r3,
            weight_r4=self.weight_r4,
            weight_r5=self.weight_r5,
            discrete_division=self.discrete_division,
            maximum_base_linear_acceleration=self.
            maximum_base_linear_acceleration,
            maximum_base_angular_velocity=self.maximum_base_angular_velocity,
            maximum_joint_effort=self.maximum_joint_effort,
            jump_increment=self.jump_increment)

        self.ur_state_object.set_desired_world_point(
            self.desired_pose.position.x, self.desired_pose.position.y,
            self.desired_pose.position.z)

        self.ur_joint_pubisher_object = JointPub()
        """
        For this version, we consider 5 actions
        1-2) Increment/Decrement haa_joint
        3-4) Increment/Decrement hfe_joint
        5) Dont Move
        6) Perform One Jump
        """
        self.action_space = spaces.Discrete(6)
        self.reward_range = (-np.inf, np.inf)

        self._seed()

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # 0st: We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetSim()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        rospy.logdebug("reset_ur_joint_controllers...")
        self.controllers_object.reset_ur_joint_controllers()

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_pose init variable...>>>" +
                       str(self.init_joint_pose))
        # We save that position as the current joint desired position
        init_pos = self.ur_state_object.init_joints_pose(self.init_joint_pose)

        # 4th: We Set the init pose to the jump topic so that the jump control can update
        rospy.logdebug("Publish init_pose for Jump Control...>>>" +
                       str(init_pos))
        # We check the jump publisher has connection
        self.ur_joint_pubisher_object.check_publishers_connection()
        # We move the joints to position, no jump
        do_jump = False
        self.ur_joint_pubisher_object.move_joints_jump(init_pos, do_jump)

        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
        rospy.logdebug("check_all_systems_ready...")
        self.ur_state_object.check_all_systems_ready()

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # 8th: Get the State Discrete Stringuified version of the observations
        rospy.logdebug("get_observations...")
        observation = self.ur_state_object.get_observations()
        state = self.get_state(observation)

        return state

    def _step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, decide which action corresponds to which joint is incremented
        next_action_position, do_jump = self.ur_state_object.get_action_to_position(
            action)

        # We move it to that pos
        self.gazebo.unpauseSim()
        self.ur_joint_pubisher_object.move_joints_jump(next_action_position,
                                                       do_jump)
        # Then we send the command to the robot and let it go
        # for running_step seconds
        time.sleep(self.running_step)
        self.gazebo.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        observation = self.ur_state_object.get_observations()

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.ur_state_object.process_data()

        # Get the State Discrete Stringuified version of the observations
        state = self.get_state(observation)

        return state, reward, done, {}

    def get_state(self, observation):
        """
        We retrieve the Stringuified-Discrete version of the given observation
        :return: state
        """
        return self.ur_state_object.get_state_as_string(observation)
예제 #19
0
class QuadrotorEnv(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment

        self.position_pub = rospy.Publisher('/drone/command/pose',
                                            PoseStamped,
                                            queue_size=100)

        # gets training parameters from param server
        self.speed_value = rospy.get_param("/speed_value")
        self.desired_pose = Pose()
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_altitude = rospy.get_param("/max_altitude")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.action_space = spaces.Discrete(
            5)  #Forward,Backward,Left,Right,Still
        self.reward_range = (-np.inf, np.inf)

        self.seed()

    # A function to initialize the random generator
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def reset(self):

        # 1st: resets the simulation to initial values
        self.gazebo.resetSim()

        # 2nd: Unpauses simulation
        self.gazebo.unpauseSim()

        # 3rd: resets the robot to initial conditions
        self.check_topic_publishers_connection()
        self.init_desired_pose()
        self.current_position = Pose()
        self.current_command = PoseStamped()
        self.current_command.pose.position.x = 0.2
        self.current_command.pose.position.y = 0.2
        # 4th: takes an observation of the initial condition of the robot
        data_pose = self.take_observation()
        observation = [
            self.calculate_state_number(data_pose.position.x,
                                        data_pose.position.y)
        ]

        # 5th: pauses simulation
        self.gazebo.pauseSim()

        return observation

    def step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, we decide which velocity command corresponds
        #current_position = self.take_observation()
        position_cmd = PoseStamped()
        position_cmd.header.frame_id = "world"
        position_cmd.pose.position.z = 2.0
        """if action == 0: #FORWARD
            position_cmd.pose.position.x = self.current_position.position.x + 1; 
        elif action == 1: #BACKWARD
            position_cmd.pose.position.x = self.current_position.position.x - 1;
        elif action == 2: #RIGHT
            position_cmd.pose.position.y = self.current_position.position.y + 1;
        elif action == 3: #LEFT
            position_cmd.pose.position.y = self.current_position.position.y - 1;
        elif action == 4: #STILL
            position_cmd.pose.position.x = self.current_position.position.x; 
            position_cmd.pose.position.y = self.current_position.position.y;"""
        if action == 0:  #FORWARD
            position_cmd.pose.position.x = self.current_command.pose.position.x + 1
            position_cmd.pose.position.y = self.current_command.pose.position.y
        elif action == 1:  #BACKWARD
            position_cmd.pose.position.x = self.current_command.pose.position.x - 1
            position_cmd.pose.position.y = self.current_command.pose.position.y
        elif action == 2:  #RIGHT
            position_cmd.pose.position.x = self.current_command.pose.position.x
            position_cmd.pose.position.y = self.current_command.pose.position.y + 1
        elif action == 3:  #LEFT
            position_cmd.pose.position.x = self.current_command.pose.position.x
            position_cmd.pose.position.y = self.current_command.pose.position.y - 1
        elif action == 4:  #STILL
            position_cmd.pose.position.x = self.current_command.pose.position.x
            position_cmd.pose.position.y = self.current_command.pose.position.y
        # Then we send the command to the robot and let it go
        # for running_step seconds
        self.gazebo.unpauseSim()
        self.position_pub.publish(position_cmd)
        #time.sleep(self.running_step) # time per step
        data_pose = self.take_observation()
        self.current_position = data_pose

        self.current_command = position_cmd

        # Wait until the drone actually got the command position
        rate = rospy.Rate(100)
        while not self.calculate_state_number(
                self.current_position.position.x, self.current_position.
                position.y) == self.calculate_state_number(
                    position_cmd.pose.position.x,
                    position_cmd.pose.position.y):
            self.position_pub.publish(position_cmd)
            data_pose = self.take_observation()
            self.current_position = data_pose
            rate.sleep()

        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.process_data(data_pose, self.desired_pose)

        state = [
            self.calculate_state_number(data_pose.position.x,
                                        data_pose.position.y)
        ]
        return state, reward, done, {}

    def take_observation(self):
        data_pose = None
        while data_pose is None:
            try:
                data_pose_raw = rospy.wait_for_message(
                    '/drone/ground_truth_to_tf/pose', PoseStamped, timeout=100)
                data_pose = data_pose_raw.pose
            except:
                rospy.loginfo(
                    "Current drone pose not ready yet, retrying for getting robot pose"
                )

        return data_pose

    def calculate_dist_between_two_Points(self, p_init, p_end):
        a = np.array((p_init.x, p_init.y))
        b = np.array((p_end.x, p_end.y))

        dist = np.linalg.norm(a - b)

        return dist

    def init_desired_pose(self):

        current_init_pose = self.take_observation()

        self.best_dist = self.calculate_dist_between_two_Points(
            current_init_pose.position, self.desired_pose.position)

    def check_topic_publishers_connection(self):

        rate = rospy.Rate(10)  # 10hz

        while (self.position_pub.get_num_connections() == 0):
            rospy.loginfo(
                "No susbribers to Cmd_position yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Cmd_position Publisher Connected")

    def reset_cmd_position_commands(self):
        # We send an empty null Twist
        position_cmd = PoseStamped()
        position_cmd.pose.position.x = 0
        position_cmd.pose.position.y = 0
        position_cmd.pose.position.z = 0
        self.position_pub.publish(position_cmd)

    def improved_distance_reward(self, current_pose, desired_pose):
        done = False
        current_state = self.calculate_state_number(current_pose.position.x,
                                                    current_pose.position.y)
        desired_state = self.calculate_state_number(desired_pose.position.x,
                                                    desired_pose.position.y)

        current_dist = self.calculate_dist_between_two_Points(
            current_pose.position, desired_pose.position)
        if current_dist < self.best_dist:
            reward = 100
            self.best_dist = current_dist
        elif current_dist == self.best_dist:
            reward = -10
        else:
            reward = -100

        if current_state == desired_state:
            reward += 1000
            done = True

        return reward, done

    def process_data(self, data_position, desired_pose):

        done = False
        x_out = data_position.position.x > 5 or data_position.position.x < -5
        y_out = data_position.position.y > 5 or data_position.position.y < -5

        if x_out or y_out:
            rospy.loginfo("(Drone flight position is out)")
            done = True
            reward = -200
        else:
            reward, done = self.improved_distance_reward(
                data_position, desired_pose)

        return reward, done

    def calculate_state_number(self, x, y):
        if x < -5:
            x = -4.99
        if x > 5:
            x = 4.99
        if y < -5:
            y = -4.99
        if y > 5:
            y = 4.99
        for i in range(10):
            if x >= -5 + i and x < -5 + i + 1:
                numberx = 10 * i
        for j in range(10):
            if y >= -5 + j and y < -5 + j + 1:
                numbery = j
        state_number = numberx + numbery
        state = state_number
        return state
예제 #20
0
class QuadCopterEnv(gym.Env):

    def __init__(self):
        
        # We assume that a ROS node has already been created
        # before initialising the environment
        
        self.vel_pub = rospy.Publisher('/cmd_vel', Twist, queue_size=5)
        self.takeoff_pub = rospy.Publisher('/ardrone/takeoff', EmptyTopicMsg, queue_size=0)
        
        # gets training parameters from param server
        self.speed_value = rospy.get_param("/speed_value")
        self.desired_pose = Pose()
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_altitude = rospy.get_param("/max_altitude")
        self.max_distance = rospy.get_param("/max_distance")
        
        # stablishes connection with simulator
        self.gazebo = GazeboConnection()
        high = np.array([
            5000,
            5000,
            5000,
            5000,
            5000,
            5000,
            5000,
            5000,
            5000])
        self.action_space = spaces.Discrete(7) #Forward,Left,Right,Up,Down,Back,None
        self.observation_space = spaces.Box(-high, high)
        self.reward_range = (-np.inf, np.inf)

        self.seed()

    # A function to initialize the random generator
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def render(self, mode=None):
        None
    # Resets the state of the environment and returns an initial observation.
    def reset(self):
        
        # 1st: resets the simulation to initial values
        self.gazebo.resetSim()

        # 2nd: Unpauses simulation
        self.gazebo.unpauseSim()

        # 3rd: resets the robot to initial conditions
        self.check_topic_publishers_connection()
        self.init_desired_pose()
        self.takeoff_sequence()
        rotation = [0,0,0]
        # 4th: takes an observation of the initial condition of the robot
        data_pose, data_imu = self.take_observation()
        observation = [self.roundTo(data_pose.position.x,100), 
                 self.roundTo(data_pose.position.y,100), 
                 self.roundTo(data_pose.position.z,100), 

                 self.roundTo(self.desired_pose.position.x,100), 
                 self.roundTo(self.desired_pose.position.y,100), 
                 self.roundTo(self.desired_pose.position.z,100),

                 self.roundTo(rotation[0], 100),
                 self.roundTo(rotation[1], 100),
                 self.roundTo(rotation[2], 100)
                 ]
        # 5th: pauses simulation
        self.gazebo.pauseSim()

        with open("/root/catkin_ws/src/position.csv", "a") as myfile:
            myfile.write("reset\n")
        with open("/root/catkin_ws/src/distance.csv", "a") as myfile:
            myfile.write("reset\n")

        return observation

    def step(self, action):
        print("Action->" + str(action))

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot
        
        # 1st, we decide which velocity command corresponds
        vel_cmd = Twist()
        vel_cmd.linear.x = 0.0
        vel_cmd.linear.y = 0.0
        vel_cmd.linear.z = 0.0
        vel_cmd.angular.z = 0.0
        if action == 0: #FORWARD
            vel_cmd.linear.x = self.speed_value
        elif action == 1: #LEFT
            vel_cmd.linear.x = 0.05
            vel_cmd.angular.z = self.speed_value
        elif action == 2: #RIGHT
            vel_cmd.linear.x = 0.05
            vel_cmd.angular.z = -self.speed_value
        elif action == 3: #Up
            vel_cmd.linear.z = self.speed_value
        elif action == 4: #Down
            vel_cmd.linear.z = -self.speed_value
        elif action == 5: #Back
            vel_cmd.linear.x = -self.speed_value

        # Then we send the command to the robot and let it go
        # for running_step seconds
        self.gazebo.unpauseSim()
        self.vel_pub.publish(vel_cmd)
        time.sleep(self.running_step)
        data_pose, data_imu = self.take_observation()
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward,done,rotation = self.process_data(data_pose, data_imu)

        # Promote going forwards instead if turning
        if action == 0:
            reward *= 2
        elif action == 1 or action == 2:
            reward *= 0.75
        elif action == 3 or action == 4:
            reward *= 1.5
        elif action == 5:
            reward *= 2

        #state = [data_pose.position, self.desired_pose]
        state = [self.roundTo(data_pose.position.x,100), 
                 self.roundTo(data_pose.position.y,100), 
                 self.roundTo(data_pose.position.z,100), 

                 self.roundTo(self.desired_pose.position.x,100), 
                 self.roundTo(self.desired_pose.position.y,100), 
                 self.roundTo(self.desired_pose.position.z,100),

                 self.roundTo(rotation[0], 100),
                 self.roundTo(rotation[1], 100),
                 self.roundTo(rotation[2], 100)
                 ]
        #print "after step state"
        print state
        print reward
        with open("/root/catkin_ws/src/position.csv", "a") as myfile:
            myfile.write("{},{},{}\n".format(state[0], state[1], state[2] ))
        return state, reward, done, {}

    def roundTo(self,data,to):
        return int(round(data*to))

    def take_observation (self):
        data_pose = None
        while data_pose is None:
            try:
#                data_pose = rospy.wait_for_message('/drone/gt_pose', Pose, timeout=5)
                data_pose = rospy.wait_for_message('/ground_truth/state', Odometry, timeout=5)
                data_pose = data_pose.pose.pose
            except Exception as e:
                None
                #rospy.loginfo("Current drone pose not ready yet, retrying for getting robot pose {0}".format(e))

        data_imu = None
        while data_imu is None:
            try:
                data_imu = rospy.wait_for_message('/ardrone/imu', Imu, timeout=5)
            except:
                None
                #rospy.loginfo("Current drone imu not ready yet, retrying for getting robot imu")
        
        return data_pose, data_imu

    def calculate_dist_between_two_Points(self,p_init,p_end):
        a = np.array((p_init.x ,p_init.y, p_init.z))
        b = np.array((p_end.x ,p_end.y, p_end.z))
        
        dist = np.linalg.norm(a-b)
        
        return dist


    def init_desired_pose(self):
        current_init_pose, imu = self.take_observation()
        self.best_dist = self.calculate_dist_between_two_Points(current_init_pose.position, self.desired_pose.position)

    def check_topic_publishers_connection(self):
        
        rate = rospy.Rate(10) # 10hz
        while(self.takeoff_pub.get_num_connections() == 0):
            rospy.loginfo("No susbribers to Takeoff yet so we wait and try again")
            rate.sleep();
        rospy.loginfo("Takeoff Publisher Connected")

        while(self.vel_pub.get_num_connections() == 0):
            rospy.loginfo("No susbribers to Cmd_vel yet so we wait and try again")
            rate.sleep();
        rospy.loginfo("Cmd_vel Publisher Connected")
        

    def reset_cmd_vel_commands(self):
        # We send an empty null Twist
        vel_cmd = Twist()
        vel_cmd.linear.z = 0.0
        vel_cmd.angular.z = 0.0
        self.vel_pub.publish(vel_cmd)


    def takeoff_sequence(self, seconds_taking_off=1):
        rospy.loginfo("Takeoff sequence starting")
        # Before taking off be sure that cmd_vel value there is is null to avoid drifts
        self.reset_cmd_vel_commands()
        rospy.loginfo("Takeoff reset cmd_vel")
        
        takeoff_msg = EmptyTopicMsg()
        rospy.loginfo( "Taking-Off Start")
        self.takeoff_pub.publish(takeoff_msg)
        time.sleep(seconds_taking_off)
        rospy.loginfo( "Taking-Off sequence completed")
        

    def improved_distance_reward(self, current_pose):
        current_dist = self.calculate_dist_between_two_Points(current_pose.position, self.desired_pose.position)
        #rospy.loginfo("Calculated Distance = "+str(current_dist))
        
        if current_dist < self.best_dist:
            reward = 100
        elif current_dist == self.best_dist:
            reward = 0
        else:
            reward = -100
            #print "Made Distance bigger= "+str(self.best_dist)
        self.best_dist = current_dist
        
        return reward, current_dist
        
    def process_data(self, data_position, data_imu):

        done = False
        
        euler = tf.transformations.euler_from_quaternion([data_imu.orientation.x,data_imu.orientation.y,data_imu.orientation.z,data_imu.orientation.w])
        roll = euler[0]
        pitch = euler[1]
        yaw = euler[2]

        pitch_bad = not(-self.max_incl < pitch < self.max_incl)
        roll_bad = not(-self.max_incl < roll < self.max_incl)
        altitude_bad = data_position.position.z > self.max_altitude or data_position.position.z < 0.05

        if altitude_bad or pitch_bad or roll_bad:
            rospy.loginfo ("(Drone flight status is wrong) >>> ("+str(altitude_bad)+","+str(pitch_bad)+","+str(roll_bad)+")")
            done = True
            reward = -200
        else:
            reward, distance = self.improved_distance_reward(data_position)
            with open("/root/catkin_ws/src/distance.csv", "a") as myfile:
                myfile.write("{}\n".format(distance))

            if (distance > self.max_distance):
                done = True

        return reward,done,[roll, pitch, yaw]
def velocity_test():
    """
    Test of the time it takes to reach a certain speed
    """

    speed_value = 10.0
    wait_time = 0.1

    rospy.init_node('cartpole_speed_test_node',
                    anonymous=True,
                    log_level=rospy.WARN)

    # Controllers Info
    controllers_list = [
        'joint_state_controller',
        'pole_joint_velocity_controller',
        'foot_joint_velocity_controller',
    ]
    robot_name_space = "cartpole_v0"
    controllers_object = ControllersConnection(
        namespace=robot_name_space, controllers_list=controllers_list)

    debug_object = MoveCartClass()

    start_init_physics_parameters = True
    reset_world_or_sim = "SIMULATION"
    gazebo = GazeboConnection(start_init_physics_parameters,
                              reset_world_or_sim)

    rospy.loginfo("RESETING SIMULATION")
    gazebo.pauseSim()
    gazebo.resetSim()
    gazebo.unpauseSim()
    rospy.loginfo("CLOCK AFTER RESET")
    debug_object.get_clock_time()
    rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
    controllers_object.reset_controllers()
    rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
    debug_object.get_clock_time()
    rospy.loginfo("START CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("SET init pose...")
    debug_object.set_init_pose()
    rospy.loginfo("WAIT FOR GOING TO INIT POSE")
    time.sleep(wait_time)

    raw_input("Start Movement...PRESS KEY")
    i = 0
    wait_times_m = []
    while not rospy.is_shutdown():
        vel_x = [speed_value]

        rospy.loginfo("Moving RIGHT...")
        debug_object.move_joints([speed_value])
        delta_time = debug_object.wait_until_base_is_in_vel([speed_value])
        wait_times_m = numpy.append(wait_times_m, [delta_time])

        rospy.loginfo("Moving STOP...")
        debug_object.move_joints([0.0])
        delta_time = debug_object.wait_until_base_is_in_vel(0.0)
        wait_times_m = numpy.append(wait_times_m, [delta_time])

        raw_input("Start Movement...PRESS KEY")

        rospy.loginfo("Moving LEFT...")
        debug_object.move_joints([-1 * speed_value])
        delta_time = debug_object.wait_until_base_is_in_vel([-1 * speed_value])
        wait_times_m = numpy.append(wait_times_m, [delta_time])

        rospy.loginfo("Moving STOP...")
        debug_object.move_joints([0.0])
        delta_time = debug_object.wait_until_base_is_in_vel(0.0)
        wait_times_m = numpy.append(wait_times_m, [delta_time])

        raw_input("Start Movement...PRESS KEY")

        i += 1

        if i > 10:
            average_time = numpy.average(wait_times_m)
            rospy.logwarn("[average_time Wait Time=" + str(average_time) + "]")
            break
예제 #22
0
class Navigation_Env(gym.Env):
    def __init__(self):
        # self.sess = tf.InteractiveSession()
        # action publisher
        self.cmd_pub = rospy.Publisher("/mobile_base/commands/velocity",
                                       Twist,
                                       queue_size=2)

        self.running_step = 1 / 50.0

        self.num_agents = 4
        self.state_dim = state_dim
        self.action_dim = 2

        self.rate = rospy.Rate(40.0)

        high = np.array([1. for _ in range(self.action_dim)])
        low = np.array([0. for _ in range(self.action_dim)])
        self.action_space = spaces.Box(low=low, high=high)

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        # state object
        self.state_object = NavigationState(self.state_dim)

        self.state_msg = ModelState()

        self._seed()

        print("end of init...")
        self.gazebo.unpauseSim()

    # A function to initialize the random generator

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # 0st: We pause the Simulator
        # rospy.logdebug("Pausing SIM...")
        # self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        # print "unpauseSim 1"
        self.gazebo.unpauseSim()

        rospy.logdebug("Reset SIM...")
        self.gazebo.resetSim()

        # random initial positions and orientations
        self.generate_random_init_pose()

        # Get the State Discrete Stringuified version of the observations
        while not self.state_object.new_msg_received:
            print "waiting for new images to arrive!"
            time.sleep(0.4)
        state = self.get_step_state()
        # print "got"
        # 7th: pauses simulation
        rospy.logdebug("Pausing SIM...")
        # print "pauseSim 1"
        self.gazebo.pauseSim()

        return state

    def _step(self, actions):
        # action to the gazebo environment
        self.gazebo.unpauseSim()

        # st = rospy.Time.now()

        action_fw = np.clip(actions, 0, 1)
        move_cmd = Twist()
        move_cmd.linear.x = action_fw[0]
        move_cmd.angular.z = action_fw[1]
        self.cmd_pub.publish(move_cmd)

        self.rate.sleep()
        time.sleep(0.025)

        self.gazebo.pauseSim()

        # end = rospy.Time.now()
        # print "time step: ", (end - st).to_sec()

        state = self.get_step_state()

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.state_object.process_data()

        return state, reward, done

    def generate_random_init_pose(self):
        init_positions = {
            13: [-15.0, 11.0, 0.0],
            14: [-6.0, 11.0, 0.0],
            15: [5.0, 11.0, 0.0],
            16: [14.0, 11.0, 0.0],
            9: [-15.0, 5.0, 0.0],
            10: [-6.0, 5.0, 0.0],
            11: [5.0, 3.0, 0.0],
            12: [14.0, 3.0, 0.0],
            5: [-15.0, -3.0, 0.0],
            6: [-6.0, -3.0, 0.0],
            7: [5.0, -3.0, 0.0],
            8: [14.0, -3.0, 0.0],
            1: [-15, -10.5, 0.0],
            2: [-6.0, -3.0, 0.0],
            3: [5.0, -10.5, 0.0],
            4: [14.0, -10.5, 0.0],
        }
        init_eulers = {
            13: [[0.0, 0.0, 0.0], [0.0, 0.0, -1.57]],
            14: [[0.0, 0.0, 0.0], [0.0, 0.0, 3.14], [0.0, 0.0, -1.57]],
            15: [[0.0, 0.0, 0.0], [0.0, 0.0, 3.14], [0.0, 0.0, -1.57]],
            16: [[0.0, 0.0, 3.14], [0.0, 0.0, -1.57]],
            9: [[0.0, 0.0, 1.57], [0.0, 0.0, 0.0], [0.0, 0.0, -1.57]],
            10: [[0.0, 0.0, 1.57], [0.0, 0.0, 3.14], [0.0, 0.0, -1.57]],
            11: [[0.0, 0.0, 1.57], [0.0, 0.0, 0.0], [0.0, 0.0, -1.57]],
            12: [[0.0, 0.0, 1.57], [0.0, 0.0, 3.14], [0.0, 0.0, -1.57]],
            5: [[0.0, 0.0, 1.57], [0.0, 0.0, 0.0], [0.0, 0.0, -1.57]],
            6: [[0.0, 0.0, 0.0], [0.0, 0.0, 1.57], [0.0, 0.0, 3.14],
                [0.0, 0.0, -1.57]],
            7: [[0.0, 0.0, 1.57], [0.0, 0.0, 3.14], [0.0, 0.0, -1.57]],
            8: [[0.0, 0.0, 1.57], [0.0, 0.0, 3.14], [0.0, 0.0, -1.57]],
            1: [[0.0, 0.0, 1.57], [0.0, 0.0, 0.0]],
            2: [[0.0, 0.0, 0.0], [0.0, 0.0, 1.57], [0.0, 0.0, 3.14]],
            3: [[0.0, 0.0, 1.57], [0.0, 0.0, 0.0], [0.0, 0.0, 3.14]],
            4: [[0.0, 0.0, 1.57], [0.0, 0.0, 3.14]]
        }

        rand_index = random.randint(1, 16)
        print "randint: ", rand_index

        position = init_positions[rand_index]
        euler_list = init_eulers[rand_index]
        rand_index_euler = random.randint(0, len(euler_list) - 1)

        print "rand_index_euler: ", rand_index_euler
        euler = euler_list[rand_index_euler]
        print position
        print euler
        quat_ = transform.transformations.quaternion_from_euler(
            euler[0], euler[1], euler[2])
        self.state_msg.model_name = 'mobile_base'
        self.state_msg.pose.position.x = position[0]
        self.state_msg.pose.position.y = position[1]
        self.state_msg.pose.position.z = position[2]
        self.state_msg.pose.orientation.x = quat_[0]
        self.state_msg.pose.orientation.y = quat_[1]
        self.state_msg.pose.orientation.z = quat_[2]
        self.state_msg.pose.orientation.w = quat_[3]

        self.set_turtlebot_init_pose()

    def set_turtlebot_init_pose(self):
        rospy.wait_for_service('/gazebo/set_model_state')
        try:
            set_state = rospy.ServiceProxy('/gazebo/set_model_state',
                                           SetModelState)
            resp = set_state(self.state_msg)

        except rospy.ServiceException, e:
            print "Service call failed: %s" % e
예제 #23
0
class OldMouseEnv(gym.Env):

    def __init__(self):



        number_actions = rospy.get_param('/mouse/n_actions')
        self.action_space = spaces.Discrete(number_actions)

        self._seed()

        #get configuration parameters
        self.init_roll_vel = rospy.get_param('/mouse/init_roll_vel')

        # Actions
        self.roll_speed_fixed_value = rospy.get_param('/mouse/roll_speed_fixed_value')
        self.roll_speed_increment_value = rospy.get_param('/mouse/roll_speed_increment_value')

        self.start_point = Point()
        self.start_point.x = rospy.get_param("/mouse/init_mouse_pose/x")
        self.start_point.y = rospy.get_param("/mouse/init_mouse_pose/y")
        self.start_point.z = rospy.get_param("/mouse/init_mouse_pose/z")

        # Done
        self.max_pitch_angle = rospy.get_param('/mouse/max_pitch_angle')

        # Rewards
        self.move_distance_reward_weight = rospy.get_param("/mouse/move_distance_reward_weight")
        self.y_linear_speed_reward_weight = rospy.get_param("/mouse/y_linear_speed_reward_weight")
        self.y_axis_angle_reward_weight = rospy.get_param("/mouse/y_axis_angle_reward_weight")
        self.end_episode_points = rospy.get_param("/mouse/end_episode_points")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()
        self.controllers_list = ['joint_state_controller',
                         'right_front_diff_drive_controller'
                         ]
        self.controllers_object = ControllersConnection(namespace="mouse",
                                                        controllers_list=self.controllers_list)


        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()

        rospy.Subscriber("/mouse/joint_states", JointState, self.joints_callback)
        rospy.Subscriber("/mouse/odom", Odometry, self.odom_callback)


        self._roll_vel_pub = rospy.Publisher('/mouse/right_front_diff_drive_controller/command', Float64, queue_size=1)
        self.check_publishers_connection()

        self.gazebo.pauseSim()


    def _seed(self, seed=None): #overriden function
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):#overriden function

        self.gazebo.unpauseSim()
        self.set_action(action)
        self.gazebo.pauseSim()
        obs = self._get_obs()
        done = self._is_done(obs)
        info = {}
        reward = self.compute_reward(obs, done)
        simplified_obs = self.convert_obs_to_state(obs)

        return simplified_obs, reward, done, info


    def reset(self):

        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()
        self.set_init_pose()
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()
        self.gazebo.pauseSim()
        self.init_env_variables()
        obs = self._get_obs()
        simplified_obs = self.convert_obs_to_state(obs)

        return simplified_obs


    def init_env_variables(self):
        """
        Inits variables needed to be initialised each time we reset at the start
        of an episode.
        :return:
        """
        self.total_distance_moved = 0.0
        self.current_y_distance = self.get_y_dir_distance_from_start_point(self.start_point)
        self.roll_turn_speed = rospy.get_param('/mouse/init_roll_vel')

    def _is_done(self, observations):

        pitch_angle = observations[3]

        if abs(pitch_angle) > self.max_pitch_angle:
            rospy.logerr("WRONG Mouse Pitch Orientation==>" + str(pitch_angle))
            done = True
        else:
            rospy.logdebug("Mouse Pitch Orientation Ok==>" + str(pitch_angle))
            done = False

        return done

    def set_action(self, action):

        # We convert the actions to speed movements to send to the parent class MouseSingleDiskEnv
        if action == 0:# Move Speed Wheel Forwards
            self.roll_turn_speed = self.roll_speed_fixed_value
        elif action == 1:# Move Speed Wheel Backwards
            self.roll_turn_speed = self.roll_speed_fixed_value
        elif action == 2:# Stop Speed Wheel
            self.roll_turn_speed = 0.0
        elif action == 3:# Increment Speed
            self.roll_turn_speed += self.roll_speed_increment_value
        elif action == 4:# Decrement Speed
            self.roll_turn_speed -= self.roll_speed_increment_value

        # We clamp Values to maximum
        rospy.logdebug("roll_turn_speed before clamp=="+str(self.roll_turn_speed))
        self.roll_turn_speed = numpy.clip(self.roll_turn_speed,
                                          -1*self.roll_speed_fixed_value,
                                          self.roll_speed_fixed_value)
        rospy.logdebug("roll_turn_speed after clamp==" + str(self.roll_turn_speed))

        # We tell the OneDiskMouse to spin the RollDisk at the selected speed
        self.move_joints(self.roll_turn_speed)


    def _get_obs(self):
        """
        Here we define what sensor data defines our robots observations
        To know which Variables we have acces to, we need to read the
        MouseSingleDiskEnv API DOCS
        :return:
        """

        # We get the orientation of the mouse in RPY
        roll, pitch, yaw = self.get_orientation_euler()

        # We get the distance from the origin
        y_distance = self.get_y_dir_distance_from_start_point(self.start_point)

        # We get the current speed of the Roll Disk
        current_disk_roll_vel = self.get_roll_velocity()

        # We get the linear speed in the y axis
        y_linear_speed = self.get_y_linear_speed()

        mouse_observations = [
            round(current_disk_roll_vel, 0),
            round(y_distance, 1),
            round(roll, 1),
            round(pitch, 1),
            round(y_linear_speed,1),
            round(yaw, 1),
        ]

        return mouse_observations


    def get_orientation_euler(self):
        # We convert from quaternions to euler
        orientation_list = [self.odom.pose.pose.orientation.x,
                            self.odom.pose.pose.orientation.y,
                            self.odom.pose.pose.orientation.z,
                            self.odom.pose.pose.orientation.w]

        roll, pitch, yaw = euler_from_quaternion(orientation_list)
        return roll, pitch, yaw

    def get_roll_velocity(self):
        # We get the current joint roll velocity
        roll_vel = self.joints.velocity[0]
        return roll_vel

    def get_y_linear_speed(self):
        # We get the current joint roll velocity
        y_linear_speed = self.odom.twist.twist.linear.y
        return y_linear_speed


    def get_y_dir_distance_from_start_point(self, start_point):
        """
        Calculates the distance from the given point and the current position
        given by odometry. In this case the increase or decrease in y.
        :param start_point:
        :return:
        """
        y_dist_dir = self.odom.pose.pose.position.y - start_point.y

        return y_dist_dir

    def compute_reward(self, observations, done):

        if not done:

            y_distance_now = observations[1]
            delta_distance = y_distance_now - self.current_y_distance
            rospy.logdebug("y_distance_now=" + str(y_distance_now)+", current_y_distance=" + str(self.current_y_distance))
            rospy.logdebug("delta_distance=" + str(delta_distance))
            reward_distance = delta_distance * self.move_distance_reward_weight
            self.current_y_distance = y_distance_now

            y_linear_speed = observations[4]
            rospy.logdebug("y_linear_speed=" + str(y_linear_speed))
            reward_y_axis_speed = y_linear_speed * self.y_linear_speed_reward_weight

            # Negative Reward for yaw different from zero.
            yaw_angle = observations[5]
            rospy.logdebug("yaw_angle=" + str(yaw_angle))
            # Worst yaw is 90 and 270 degrees, best 0 and 180. We use sin function for giving reward.
            sin_yaw_angle = math.sin(yaw_angle)
            rospy.logdebug("sin_yaw_angle=" + str(sin_yaw_angle))
            reward_y_axis_angle = -1 * abs(sin_yaw_angle) * self.y_axis_angle_reward_weight


            # We are not intereseted in decimals of the reward, doesnt give any advatage.
            reward = round(reward_distance, 0) + round(reward_y_axis_speed, 0) + round(reward_y_axis_angle, 0)
            rospy.logdebug("reward_distance=" + str(reward_distance))
            rospy.logdebug("reward_y_axis_speed=" + str(reward_y_axis_speed))
            rospy.logdebug("reward_y_axis_angle=" + str(reward_y_axis_angle))
            rospy.logdebug("reward=" + str(reward))
        else:
            reward = -1*self.end_episode_points

        return reward


    def joints_callback(self, data):
        self.joints = data

    def odom_callback(self, data):
        self.odom = data


    def check_all_sensors_ready(self):
        self.check_joint_states_ready()
        self.check_odom_ready()
        rospy.logdebug("ALL SENSORS READY")

    def check_joint_states_ready(self):
        self.joints = None
        while self.joints is None and not rospy.is_shutdown():
            try:
                self.joints = rospy.wait_for_message("/mouse/joint_states", JointState, timeout=1.0)
                rospy.logdebug("Current mouse/joint_states READY=>" + str(self.joints))

            except:
                rospy.logerr("Current mouse/joint_states not ready yet, retrying for getting joint_states")
        return self.joints

    def check_odom_ready(self):
        self.odom = None
        while self.odom is None and not rospy.is_shutdown():
            try:
                self.odom = rospy.wait_for_message("/mouse/odom", Odometry, timeout=1.0)
                rospy.logdebug("Current /mouse/odom READY=>" + str(self.odom))

            except:
                rospy.logerr("Current /mouse/odom not ready yet, retrying for getting odom")

        return self.odom

    def check_publishers_connection(self):
        """
        Checks that all the publishers are working
        :return:
        """
        rate = rospy.Rate(10)  # 10hz
        while (self._roll_vel_pub.get_num_connections() == 0 and not rospy.is_shutdown()):
            rospy.logdebug("No susbribers to _roll_vel_pub yet so we wait and try again")
            try:
                rate.sleep()
            except rospy.ROSInterruptException:
                # This is to avoid error when world is rested, time when backwards.
                pass
        rospy.logdebug("_base_pub Publisher Connected")

        rospy.logdebug("All Publishers READY")

    def move_joints(self, roll_speed):
        joint_speed_value = Float64()
        joint_speed_value.data = roll_speed
        rospy.logdebug("Single Disk Roll Velocity>>" + str(joint_speed_value))
        self._roll_vel_pub.publish(joint_speed_value)
        self.wait_until_roll_is_in_vel(joint_speed_value.data)

    def wait_until_roll_is_in_vel(self, velocity):

        rate = rospy.Rate(10)
        start_wait_time = rospy.get_rostime().to_sec()
        end_wait_time = 0.0
        epsilon = 0.1
        v_plus = velocity + epsilon
        v_minus = velocity - epsilon
        while not rospy.is_shutdown():
            joint_data = self.check_joint_states_ready()
            roll_vel = joint_data.velocity[0]
            rospy.logdebug("VEL=" + str(roll_vel) + ", ?RANGE=[" + str(v_minus) + ","+str(v_plus)+"]")
            are_close = (roll_vel <= v_plus) and (roll_vel > v_minus)
            if are_close:
                rospy.logdebug("Reached Velocity!")
                end_wait_time = rospy.get_rostime().to_sec()
                break
            rospy.logdebug("Not there yet, keep waiting...")
            rate.sleep()
        delta_time = end_wait_time- start_wait_time
        rospy.logdebug("[Wait Time=" + str(delta_time)+"]")
        return delta_time


    def set_init_pose(self):
        """Sets the Robot in its init pose
        """
        self.move_joints(self.init_roll_vel)

        return True


    def convert_obs_to_state(self,observations):
        """
        Converts the observations used for reward and so on to the essentials for the robot state
        In this case we only need the orientation of the mouse and the speed of the disc.
        The distance doesnt condition at all the actions
        """
        disk_roll_vel = observations[0]
        y_linear_speed = observations[4]
        yaw_angle = observations[5]

        state_converted = [disk_roll_vel, y_linear_speed, yaw_angle]

        return state_converted
예제 #24
0
class QuadCopterEnv(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment

        self.vel_pub = rospy.Publisher('/cmd_vel', Twist, queue_size=5)
        self.takeoff_pub = rospy.Publisher('/drone/takeoff',
                                           EmptyTopicMsg,
                                           queue_size=0)
        self.gimbal_pub = rospy.Publisher('/dji_sdk/gimbal',
                                          Gimbal,
                                          queue_size=5)

        # gets training parameters from param server
        '''
        self.speed_value = rospy.get_param("/speed_value")
        self.desired_pose = Pose()
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_altitude = rospy.get_param("/max_altitude")
        self.min_altitude = rospy.get_param("/min_altitude")
        '''
        self.speed_value = 1
        self.desired_pose = Pose()
        self.desired_pose.position.z = 0.7
        self.desired_pose.position.x = 14
        self.desired_pose.position.y = 1
        self.running_step = 0.05
        self.max_incl = 0.7
        self.max_altitude = 20
        self.min_altitude = 0.1
        self.max_x = 15
        self.max_y = 10
        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.action_space = 2
        self.observation_space = 6
        self.reward_range = (-np.inf, np.inf)

        self._seed()
        self.shapestore = []
        self.shapestore.append(0)
        self.act = np.zeros(3)
        self.pr = 0
        self.pdlen = 0
        self.plotpd = pd.DataFrame({
            'p_x': [],
            'p_y': [],
            'v_x': [],
            'v_y': [],
            'time': []
        })
        self.tinit = time.time()

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        # 1st: resets the simulation to initial values
        self.gazebo.resetSim()

        # 2nd: Unpauses simulation
        self.gazebo.unpauseSim()

        # 3rd: resets the robot to initial conditions
        self.check_topic_publishers_connection()
        #self.init_desired_pose()
        self.takeoff_sequence()
        # 4th: takes an observation of the initial condition of the robot
        data_pose, data_vel, data_pr, data_bs, data_imu = self.take_observation(
        )
        state, reward, done = self.process_data(data_pose, data_vel, data_pr,
                                                data_bs, data_imu)

        # 5th: pauses simulation
        self.gazebo.pauseSim()
        self.pr = 0
        #self.plotpd = pd.DataFrame({'p_x' : [], 'p_y' : [], 'v_x' : [], 'v_y' : []})
        return state

    def _step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, we decide which velocity command corresponds
        self.gazebo.unpauseSim()
        data_pose_1, data_vel_1, data_pr_1, data_bs_1, data_imu_1 = self.take_observation(
        )
        z_vel = -0.2
        coef = 1
        if data_pose_1.position.z < 1.2:
            z_vel = 0
        vel_cmd = Twist()
        '''
        if action == 0: #FORWARD
            vel_cmd.linear.x = self.speed_value
            vel_cmd.angular.z = 0.0
        elif action == 1: #LEFT
            vel_cmd.linear.x = 0.05
            vel_cmd.angular.z = self.speed_value
        elif action == 2: #RIGHT
            vel_cmd.linear.x = 0.05
            vel_cmd.angular.z = -self.speed_value
        elif action == 3: #Up
            vel_cmd.linear.z = self.speed_value
            vel_cmd.angular.z = 0.0
        elif action == 4: #Down
            vel_cmd.linear.z = -self.speed_value
            vel_cmd.angular.z = 0.0
        '''
        vel_cmd.linear.x = action[0] * coef
        vel_cmd.linear.y = action[1] * coef
        vel_cmd.linear.z = z_vel
        vel_cmd.angular.x = 0.0
        vel_cmd.angular.y = 0.0
        vel_cmd.angular.z = 0.0
        self.act = action

        # Then we send the command to the robot and let it go
        # for running_step seconds
        self.vel_pub.publish(vel_cmd)
        time.sleep(self.running_step)
        data_pose, data_vel, data_pr, data_bs, data_imu = self.take_observation(
        )
        self.set_gimbal(data_bs, data_pose)
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        state, reward, done = self.process_data(data_pose, data_vel, data_pr,
                                                data_bs, data_imu)

        # Promote going forwards instead if turning
        '''
        if action == 0:
            reward += 100
        elif action == 1 or action == 2:
            reward -= 50
        elif action == 3:
            reward -= 150
        else:
            reward -= 50
        '''
        return state, reward, done, {}

    def take_observation(self):
        data_pose = None
        while data_pose is None:
            try:
                data_pose = rospy.wait_for_message('/drone/gt_pose',
                                                   Pose,
                                                   timeout=5)
            except:
                continue
                #rospy.loginfo("Current drone pose not ready yet, retrying for getting robot pose")

        data_vel = None
        while data_vel is None:
            try:
                data_vel = rospy.wait_for_message('/drone/gt_vel',
                                                  Twist,
                                                  timeout=5)
            except:
                continue
                #rospy.loginfo("Current drone vel not ready yet, retrying for getting robot vel")
        data_pr = 0
        '''
        data_pose_t=None
        while data_pose_t is None:
            try:
                data_pose_t = rospy.wait_for_message('/tag_detections', aprmsg, timeout=5)
                if len(data_pose_t.detections)!=0:
                    data_pose= data_pose_t.detections[0].pose.pose.pose
                else:
                    continue
            except:
                rospy.loginfo("waiting for april tag message")
        '''
        data_bs = None
        while data_bs is None:
            try:
                data_bs = rospy.wait_for_message('/gazebo/model_states',
                                                 ModelStates,
                                                 timeout=5)
            except:
                continue
                #rospy.loginfo("Current base imu not ready yet, retrying for getting base values")
        data_imu = None
        '''
        while data_imu is None:
            try:
                data_imu = rospy.wait_for_message('/drone/imu', Imu, timeout=5)
            except:
                #continue
                rospy.loginfo("Current drone imu not ready yet, retrying for getting robot imu")
        '''
        return data_pose, data_vel, data_pr, data_bs, data_imu

    def set_gimbal(self, data_bs, data_pose):
        base_pose = data_bs.pose[2].position
        drone_pose = data_pose.position
        x = drone_pose.x - base_pose.x
        y = drone_pose.y - base_pose.y
        z = drone_pose.z - base_pose.z
        pitch = np.arctan(y / z) * 180 / math.pi
        yaw = np.arctan(x / z) * 180 / math.pi
        gimmsg = Gimbal()
        gimmsg.pitch = pitch
        gimmsg.yaw = yaw
        gimmsg.roll = 0
        self.gimbal_pub.publish(gimmsg)

    def calculate_dist_between_two_Points(self, p_init, p_end):
        a = np.array((p_init.x, p_init.y, p_init.z))
        b = np.array((p_end.x, p_end.y, p_end.z))

        dist = np.linalg.norm(a - b)

        return dist

    def init_desired_pose(self):

        current_init_pose, imu = self.take_observation()

        self.best_dist = self.calculate_dist_between_two_Points(
            current_init_pose.position, self.desired_pose.position)

    def check_topic_publishers_connection(self):

        rate = rospy.Rate(10)  # 10hz
        while (self.takeoff_pub.get_num_connections() == 0):
            rospy.loginfo(
                "No susbribers to Takeoff yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Takeoff Publisher Connected")

        while (self.vel_pub.get_num_connections() == 0):
            rospy.loginfo(
                "No susbribers to Cmd_vel yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Cmd_vel Publisher Connected")

    def initial_flight(self):
        vel_cmd = Twist()
        vel_cmd.linear.x = 0
        vel_cmd.linear.y = 0
        vel_cmd.linear.z = 1
        vel_cmd.angular.x = 0.0
        vel_cmd.angular.y = 0.0
        vel_cmd.angular.z = 0.0
        self.vel_pub.publish(vel_cmd)

    def reset_cmd_vel_commands(self):
        # We send an empty null Twist
        vel_cmd = Twist()
        vel_cmd.linear.z = 0.0
        vel_cmd.angular.z = 0.0
        self.vel_pub.publish(vel_cmd)

    def takeoff_sequence(self, seconds_taking_off=1):
        # Before taking off be sure that cmd_vel value there is is null to avoid drifts
        self.reset_cmd_vel_commands()

        takeoff_msg = EmptyTopicMsg()
        rospy.loginfo("Taking-Off Start")
        self.takeoff_pub.publish(takeoff_msg)
        time.sleep(seconds_taking_off)
        #t=time.time()
        #while(time.time()-t<4):
        #    self.initial_flight()
        rospy.loginfo("Taking-Off sequence completed")

    def improved_distance_reward(self, current_pose):
        current_dist = self.calculate_dist_between_two_Points(
            current_pose.position, self.desired_pose.position)
        #rospy.loginfo("Calculated Distance = "+str(current_dist))

        if current_dist < self.best_dist:
            reward = 100
            self.best_dist = current_dist
        elif current_dist == self.best_dist:
            reward = 0
        else:
            reward = -100
            #print "Made Distance bigger= "+str(self.best_dist)

        return reward

    def process_data(self, data_pos, data_vel, data_pr, data_bs, data_imu):
        base_pose = data_bs.pose[2].position
        base_vel = data_bs.twist[2].linear
        p_x = data_pos.position.x - base_pose.x
        p_y = data_pos.position.y - base_pose.y
        p_z = data_pos.position.z - base_pose.z
        v_x = data_vel.linear.x - base_vel.x
        v_y = data_vel.linear.y - base_vel.y
        self.plotpd = self.plotpd.append(
            {
                'p_x': p_x,
                'p_y': p_y,
                'v_x': v_x,
                'v_y': v_y,
                'time': time.time() - self.tinit
            },
            ignore_index=True)
        try:
            self.plotpd.to_csv('./data.csv', sep='\t', encoding='utf-8')
            #print(self.plotpd)
        except:
            print("failed")
        #self.pdlen = self.plotpd.shape[0]
        a_x = self.act[0]
        a_y = self.act[1]
        c = 0
        if abs(p_x) < 0.75 and abs(p_y) < 0.75 and p_z < 1.2:
            c = 1
        self.pr = self.pr + c
        shaping = -100 * np.sqrt(p_x**2 + p_y**2) - 10 * np.sqrt(
            v_x**2 + v_y**2) - np.sqrt(
                a_x**2 + a_y**2) + 10 * c * (2 - abs(a_x) - abs(a_y))
        self.shapestore.append(shaping)
        reward = self.shapestore[-1] - self.shapestore[-2]
        state = np.array([p_x, p_y, p_z, v_x, v_y, c])
        #if c==1:
        #    print("positions: ",p_x,p_y,p_z)
        done = False
        #euler = tf.transformations.euler_from_quaternion([data_imu.orientation.x,data_imu.orientation.y,data_imu.orientation.z,data_imu.orientation.w])
        #roll = euler[0]
        #pitch = euler[1]
        #yaw = euler[2]

        #pitch_bad = not(-self.max_incl < pitch < self.max_incl)
        #roll_bad = not(-self.max_incl < roll < self.max_incl)
        altitude_bad_1 = data_pos.position.z > self.max_altitude
        altitude_bad_2 = data_pos.position.z < self.min_altitude
        x_bad = abs(data_pos.position.x) > self.max_x
        #x_bad=False
        y_bad = abs(data_pos.position.y) > self.max_y

        if altitude_bad_1 or altitude_bad_2 or x_bad or y_bad:
            rospy.loginfo("(Drone flight status is wrong) >>> (" +
                          str(altitude_bad_2) + "," + str(x_bad) + "," +
                          str(y_bad) + "," + str(self.pr) + ")")
            done = True
        return state, reward, done
예제 #25
0
class DroneController(gym.Env):

    # Initialize the commands
    def __init__(self):
        # Subscribe to the /ardrone/navdata topic, of message type navdata, and call self.ReceiveNavdata when a message is received
        self.subNavdata = rospy.Subscriber('/ardrone/navdata', Navdata, self.receive_navdata)
        self.subOdom = rospy.Subscriber('/ground_truth/state', Odometry, self.odom_callback)
        rospy.Subscriber('/vrpn_client_node/TestTed/pose', PoseStamped, self.optictrack_callback)

        # Allow the controller to publish to the /ardrone/takeoff, land and reset topics
        self.pubLand = rospy.Publisher('/ardrone/land', Empty, queue_size=0)
        self.pubTakeoff = rospy.Publisher('/ardrone/takeoff', Empty, queue_size=0)
        self.pubReset = rospy.Publisher('/ardrone/reset', Empty, queue_size=1)

        # Allow the controller to publish to the /cmd_vel topic and thus control the drone
        self.pubCommand = rospy.Publisher('/cmd_vel', Twist, queue_size=1)

        # Put location into odometry
        self.location = Odometry()
        self.status = Navdata()
        self.real_loc = PoseStamped()
        # Gets parameters from param server
        self.speed_value = rospy.get_param("/speed_value")
        self.run_step = rospy.get_param("/run_step")
        self.desired_pose = Pose()
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_altitude = rospy.get_param("/max_altitude")
        self.exporation_noise = OUNoise()
        self.on_place = 0

        # initialize action space
        # Forward,Left,Right,Up,Down
        #self.action_space = spaces.Discrete(8)
        self.action_space = spaces.Box(np.array((-0.5, -0.5, -0.5, -0.5)), np.array((0.5, 0.5, 0.5, 0.5)))
        # Gazebo Connection
        self.gazebo = GazeboConnection()

        self._seed()
        # Land the drone if we are shutting down
        rospy.on_shutdown(self.send_land)

    # ---------------------- Basic Functions ---------------------- #
    # Receive the navigation data
    def receive_navdata(self, navdata):
        # Although there is a lot of data in this packet, we're only interested in the state at the moment
        self.status = navdata
        # print '[{0:.3f}] X: {1:.3f}'.format(navdata.header.stamp.to_sec(), navdata.vx)

    # call back odo data to subscriber
    def odom_callback(self, data):
        self.location = data.pose.pose

    # call back the position and orientation to subscriber
    def optictrack_callback(self, data):
        self.real_loc = data
        self.x_real = self.real_loc.pose.position.x  # along the short length of space
        self.y_real = self.real_loc.pose.position.y  # along the long hall
        self.z_real = self.real_loc.pose.position.z  # up and down
        self.imu_x_real = self.real_loc.pose.orientation.x
        self.imu_y_real = self.real_loc.pose.orientation.y
        self.imu_z_real = self.real_loc.pose.orientation.z
        self.imu_w_real = self.real_loc.pose.orientation.w
        # self.x_real, self.y_real = self.y_real, self.x_real
        euler = tf.transformations.euler_from_quaternion(
            [self.imu_x_real, self.imu_y_real, self.imu_z_real, self.imu_w_real])
        roll = euler[0]  # pitch
        pitch = euler[1]  # roll
        yaw = euler[2]  # yaw

        rospy.loginfo(("pitch: " + str(roll)))
    # take observation for pose and orientation
    # orientation is used for calculate euler angle
    def take_observation(self):

        data_pose = self.location.position
        data_imu = self.location.orientation
        state = self.status
        return data_pose, data_imu, state

    # Takeoff
    def send_takeoff(self):
        # Send a takeoff message to the ardrone driver
        self.reset_action()
        time.sleep(0.5)
        self.pubTakeoff.publish(Empty())

    # Send Land Message
    def send_land(self):
        # Send a landing message to the ardrone driver
        # Note we send this in all states, landing can do no harm
        self.reset_action()
        time.sleep(0.5)
        self.pubLand.publish(Empty())

    # Send emergency message to drone
    def send_emergency(self):
        # Send an emergency (or reset) message to the ardrone driver
        self.pubReset.publish(Empty())

    # Take Action
    def take_action(self, roll, pitch, z_velocity, yaw_velocity=0, something1=0, something2=0):

        # Called by the main program to set the current command
        command = Twist()
        command.linear.x = pitch  # go y direction / green axis
        command.linear.y = roll  # go x direction / red axis
        command.linear.z = z_velocity  # Spinning anti-clockwise
        command.angular.z = yaw_velocity  # go z direction / blue axis
        command.angular.x = something1
        command.angular.y = something2

        self.pubCommand.publish(command)

    # ---------------------- Initialize Simulation ---------------------- #
    # reset command action and takeoff the drone
    def takeoff_sequence(self, seconds_taking_off=2):
        # Before taking off be sure that cmd_vel value there is is null to avoid drifts
        self.reset_action()

        # wait for 1 seconds
        time.sleep(1)
        rospy.loginfo("Taking-Off Start")
        self.send_takeoff()
        time.sleep(seconds_taking_off)
        rospy.loginfo("Taking-Off sequence completed")

    # Check if any topic is published
    def check_topic_publishers_connection(self):

        rate = rospy.Rate(10)  # 10hz
        while self.pubTakeoff.get_num_connections() == 0:
            rospy.loginfo("No susbribers to Takeoff yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Takeoff Publisher Connected")

        while self.pubCommand.get_num_connections() == 0:
            rospy.loginfo("No susbribers to Cmd_vel yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Cmd_vel Publisher Connected")

    # Reset the action
    def reset_action(self):
        self.take_action(0, 0, 0)

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Reset the simulation
    def _reset(self):
        # reset action
        self.reset_action()
        self.send_land()
        time.sleep(0.6)
        self.exporation_noise.reset()
        time.sleep(1)
        # reset the pose on the simulation
        self.gazebo.resetSim()
        self.gazebo.set_location()

        self.gazebo.unpauseSim()
        # check any topic published
        self.check_topic_publishers_connection()

        # takeoff the drone
        self.takeoff_sequence()

        # get the distance from origin to desire position before takeoff
        self.init_desired_pose()

        # take observation
        data_pose, data_imu, vel = self.take_observation()

        # observe the x position for now
        pos_x, pos_y, pos_z = data_pose.x, data_pose.y, data_pose.z

        pos = [pos_x, pos_y, pos_z]

        state = np.concatenate((pos, [1. if self.on_place else 0.]))
        #self.gazebo.pauseSim()
        return state

    # ---------------------- Action Processing ---------------------- #
    # take an action from action space [0,5]
    def _step(self, action):

        if action == 0:  # FORWARD
            self.take_action(0, self.speed_value, 0)
        elif action == 1:  # BACKWARD
            self.take_action(0, -self.speed_value, 0)
        elif action == 2:  # LEFT
            self.take_action(self.speed_value, 0, 0)
        elif action == 3:  # RIGHT
            self.take_action(-self.speed_value, 0, 0)
        elif action == 4:  # Up
            self.take_action(0, 0, self.speed_value)
        elif action == 5:  # Down
            self.take_action(0, 0, -self.speed_value)
        elif action == 6:
            self.take_action(0, 0, 0, 0)
        #self.take_action(action[0], action[1], action[2], action[3])
        time.sleep(self.run_step)

        # get the now state
        self.data_pose, self.data_imu, self.vel = self.take_observation()

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.process_data(self.data_pose, self.data_imu)

        current_dist, loc_dist = self.calculate_dist_between_two_Points(self.data_pose, self.desired_pose.position)

        if current_dist <= 0.35:
            reward += 10
            self.on_place += 1
            if self.on_place > 70:
                done = True
                reward += 50
        elif current_dist > 5:
            reward -= 25
            self.on_place = 0
        else:
            reward += 5 - (current_dist / 0.2)
            self.on_place = 0
        if (loc_dist[0] < 0.4 or loc_dist[1] < 0.4 or loc_dist[2] < 0.4):
            reward += 1
        elif (loc_dist[0] < 0.1 and loc_dist[1] < 0.1 and loc_dist[2] < 0.1):
            reward += 5

        pos_x, pos_y, pos_z = self.data_pose.x, self.data_pose.y, self.data_pose.z

        pos = [pos_x, pos_y, pos_z]

        state = np.concatenate((pos, [1. if self.on_place else 0.]))
        return state, reward, done, {}

    # calculate the distance between two location
    def calculate_dist_between_two_Points(self, p_init, p_end):
        a = np.array((p_init.x, p_init.y, p_init.z))
        b = np.array((p_end.x, p_end.y, p_end.z))

        dist_loc = abs(b - a)
        dist = np.linalg.norm(a - b)

        return dist, dist_loc

    # initialize the desired pose
    def init_desired_pose(self):

        current_init_pose, imu, vel = self.take_observation()

        self.best_dist = self.calculate_dist_between_two_Points(current_init_pose, self.desired_pose.position)

    # imporve the reward if the drone keep in desire distance
    def improved_distance_reward(self, current_pose):
        current_dist = self.calculate_dist_between_two_Points(current_pose, self.desired_pose.position)
        # rospy.loginfo("Calculated Distance = "+str(current_dist))

        if current_dist < self.best_dist:
            reward = 10
            self.best_dist = current_dist
        elif current_dist == self.best_dist:
            reward = 0
        else:
            reward = -10
            # print "Made Distance bigger= "+str(self.best_dist)

        return reward

    def process_data(self, data_position, data_imu):

        done = False

        euler = tf.transformations.euler_from_quaternion(
            [data_imu.x, data_imu.y, data_imu.z, data_imu.w])
        roll = euler[0]
        pitch = euler[1]
        yaw = euler[2]

        pitch_bad = not (-self.max_incl < pitch < self.max_incl)
        roll_bad = not (-self.max_incl < roll < self.max_incl)
        altitude_too_high = data_position.z > self.max_altitude
        altitude_too_low = data_position.z < 0.3

        if altitude_too_high or pitch_bad or roll_bad or altitude_too_low:
            rospy.loginfo("(Drone flight status is wrong) >>> (" + str(altitude_too_high) + "," + str(pitch_bad) +
                          "," + str(roll_bad) + "," + str(altitude_too_low) + ")")
            done = True
            self.on_place = 0
            reward = -100
        else:
            reward = self.improved_distance_reward(data_position)
            #reward = 0
        return reward, done
예제 #26
0
class SimplicityEnv(gym.Env):

    def __init__(self):
        
        # We assume that a ROS node has already been created
        # before initialising the environment

        # gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_height = rospy.get_param("/max_height")
        self.min_height = rospy.get_param("/min_height")
        self.joint_increment_value = rospy.get_param("/joint_increment_value")
        self.done_reward = rospy.get_param("/done_reward")
        self.alive_reward = rospy.get_param("/alive_reward")
        self.desired_force = rospy.get_param("/desired_force")
        self.desired_yaw = rospy.get_param("/desired_yaw")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        self.weight_r4 = rospy.get_param("/weight_r4")
        self.weight_r5 = rospy.get_param("/weight_r5")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="simplicity")

        self.simplicity_state_object = SimplicityState(   max_height=self.max_height,
                                                    min_height=self.min_height,
                                                    abs_max_roll=self.max_incl,
                                                    abs_max_pitch=self.max_incl,
                                                    joint_increment_value=self.joint_increment_value,
                                                    done_reward=self.done_reward,
                                                    alive_reward=self.alive_reward,
                                                    desired_force=self.desired_force,
                                                    desired_yaw=self.desired_yaw,
                                                    weight_r1=self.weight_r1,
                                                    weight_r2=self.weight_r2,
                                                    weight_r3=self.weight_r3,
                                                    weight_r4=self.weight_r4,
                                                    weight_r5=self.weight_r5
                                                )

        self.simplicity_state_object.set_desired_world_point(self.desired_pose.position.x,
                                                          self.desired_pose.position.y,
                                                          self.desired_pose.position.z)

        self.simplicity_joint_pubisher_object = JointPub()
        


        """
        For this version, we consider 6 actions
        1-2: Increment/Decrement front_left_shoulder
        3-4: Increment/Decrement front_left_thigh
        5-6: Increment/Decrement front_left_shank

        7-8: Increment/Decrement front_right_shoulder
        9-10: Increment/Decrement front_right_thigh
        11-12: Increment/Decrement front_right_shank

        13-14: Increment/Decrement back_left_shoulder
        15-16: Increment/Decrement back_left_thigh
        17-18: Increment/Decrement back_left_shank

        19-20: Increment/Decrement back_right_shoulder
        21-22: Increment/Decrement back_right_thigh
        23-24: Increment/Decrement back_right_shank
        """
        self.action_space = spaces.Discrete(24)
        self.reward_range = (-np.inf, np.inf)

        self.seed()

    # A function to initialize the random generator
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
        
    # Resets the state of the environment and returns an initial observation.
    def reset(self):

        # 0st: We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetSim()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        rospy.logdebug("reset_robot_joint_controllers...")
        self.controllers_object.reset_simplicity_joint_controllers()

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_pose...")
        self.simplicity_joint_pubisher_object.set_init_pose()

        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
        rospy.logdebug("check_all_systems_ready...")
        self.simplicity_state_object.check_all_systems_ready()
        rospy.logdebug("get_observations...")
        observation = self.simplicity_state_object.get_observations()

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # Get the State Discrete Stringuified version of the observations
        state = self.get_state(observation)

        return state

    def step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, decide which action corresponsd to which joint is incremented
        next_action_position = self.simplicity_state_object.get_action_to_position(action)

        # We move it to that pos
        self.gazebo.unpauseSim()
        self.simplicity_joint_pubisher_object.move_joints(next_action_position)
        # Then we send the command to the robot and let it go
        # for running_step seconds
        time.sleep(self.running_step)
        self.gazebo.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        observation = self.simplicity_state_object.get_observations()

        # finally we get an evaluation based on what happened in the sim
        reward,done = self.simplicity_state_object.process_data()

        # Get the State Discrete Stringuified version of the observations
        state = self.get_state(observation)

        return state, reward, done, {}

    def get_state(self, observation):
        """
        We retrieve the Stringuified-Discrete version of the given observation
        :return: state
        """
        return self.simplicity_state_object.get_state_as_string(observation)
예제 #27
0
class QuadCopterEnv(gym.Env):
    def __init__(self, debug):

        # We assume that a ROS node has already been created
        # before initialising the environment

        self.vel_pub = rospy.Publisher('/drone_1/cmd_vel', Twist, queue_size=5)
        self.takeoff_pub = rospy.Publisher('/drone_1/takeoff',
                                           EmptyTopicMsg,
                                           queue_size=0)
        self.pose_pub = rospy.Publisher('/drone_1/command/pose',
                                        PoseStamped,
                                        queue_size=5)

        # gets training parameters from param server
        self.speed_value = rospy.get_param("/speed_value")
        self.desired_pose = Pose()
        self.desired_pose.position.z = rospy.get_param("/desired_pose/z")
        self.desired_pose.position.x = rospy.get_param("/desired_pose/x")
        self.desired_pose.position.y = rospy.get_param("/desired_pose/y")
        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_altitude = rospy.get_param("/max_altitude")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        #self.action_space = spaces.Discrete(5) #Forward,Left,Right,Up,Down
        self.num_states = 3
        self.num_actions = 3
        self.reward_range = (-np.inf, np.inf)

        self._seed()

        self.goal_pose = [5, -5, 5]
        self.goal_threshold = 0.5
        self.goal_reward = 50
        self.prev_state = []

        # Spatial limits
        self.ceiling = 10
        self.floor = 0.3
        self.x_limit = 10
        self.y_limit = 10

        self.cmd_achieved = False
        self.best_dist = 0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def reset(self):

        # 1st: resets the simulation to initial values
        self.gazebo.resetSim()

        # 2nd: Unpauses simulation
        self.gazebo.unpauseSim()

        # 3rd: resets the robot to initial conditions
        self.check_topic_publishers_connection()
        self.init_desired_pose()
        self.takeoff_sequence()

        # 4th: takes an observation of the initial condition of the robot
        data_pose, imu_pose = self.take_observation()
        observation = [
            data_pose.position.x, data_pose.position.y, data_pose.position.z
        ]
        self.prev_state = observation

        # 5th: pauses simulation
        self.gazebo.pauseSim()

        return observation

    def step(self, action):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, we decide which velocity command corresponds
        vel_cmd = Twist()
        vel_cmd.linear.x = action[0]
        vel_cmd.linear.y = action[1]
        vel_cmd.linear.z = action[2]

        # Then we send the command to the robot and let it go
        # for running_step seconds
        self.gazebo.unpauseSim()
        self.vel_pub.publish(vel_cmd)
        time.sleep(self.running_step)

        self.cmd_achieved = False
        #count = 0
        '''while self.cmd_achieved == False:
            self.vel_pub.publish(vel_cmd)
            self.cmd_achieved = self.velocity_check(vel_cmd)
            count += 1
            if count > 100:
                break
            #print("stuck here")'''

        data_pose, data_imu = self.take_observation()
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, done = self.process_data(data_pose)

        # Promote going forwards instead if turning

        state = [
            data_pose.position.x, data_pose.position.y, data_pose.position.z
        ]
        self.prev_state = state
        return state, reward, done, {}

    def take_observation(self):
        data_pose = None
        while data_pose is None:
            try:
                data_pose_raw = rospy.wait_for_message(
                    '/drone_1/ground_truth_to_tf/pose',
                    PoseStamped,
                    timeout=10)
                data_pose = data_pose_raw.pose  # Equals to pose_ in rohit-s-murthy's code
            except:
                rospy.loginfo(
                    "Current drone pose not ready yet, retrying for getting robot pose"
                )

        data_imu = None
        while data_imu is None:
            try:
                data_imu = rospy.wait_for_message('/drone_1/raw_imu',
                                                  Imu,
                                                  timeout=10)
            except:
                rospy.loginfo(
                    "Current drone imu not ready yet, retrying for getting robot imu"
                )

        return data_pose, data_imu

    def init_desired_pose(self):

        current_init_pose, current_init_imu = self.take_observation()

        self.best_dist = self._distance(current_init_pose)

    def check_topic_publishers_connection(self):

        rate = rospy.Rate(10)  # 10hz

        while (self.vel_pub.get_num_connections() == 0):
            rospy.loginfo(
                "No susbribers to Cmd_vel yet so we wait and try again")
            rate.sleep()
        rospy.loginfo("Cmd_vel Publisher Connected")

    def reset_cmd_vel_commands(self):
        # We send an empty null Twist
        vel_cmd = Twist()
        vel_cmd.linear.z = 0.0
        vel_cmd.angular.z = 0.0
        self.vel_pub.publish(vel_cmd)

    def takeoff_sequence(self, seconds_taking_off=1):
        # Before taking off be sure that cmd_vel value there is is null to avoid drifts
        #self.reset_cmd_vel_commands()
        '''takeoff_msg = EmptyTopicMsg()
        rospy.loginfo( "Taking-Off Start")
        self.takeoff_pub.publish(takeoff_msg)
        time.sleep(seconds_taking_off)
        rospy.loginfo( "Taking-Off sequence completed")'''
        # rate = rospy.Rate(10)

        # while not rospy.is_shutdown():
        """while count < 5:
            msg.linear.z = 0.5
            # rospy.loginfo('Lift off')

            self.pose_pub.publish(msg)
            count = count + 1
            time.sleep(1.0)

        msg.linear.z = 0.0
        self.vel_pub.publish(msg)"""
        position_command = PoseStamped()
        position_command.pose.position.x = 0
        position_command.pose.position.y = 0
        position_command.pose.position.z = 5
        # The frame_id should be modified with the group ns
        position_command.header.frame_id = "drone_1/world"

        data_pose, imu_pose = self.take_observation()
        while abs(data_pose.position.z -
                  position_command.pose.position.z) > 0.1:
            data_pose, imu_pose = self.take_observation()
            self.pose_pub.publish(position_command)
            #time.sleep(1.0)
        print("Taking-Off sequence completed")

    def process_data(self, data_pose):

        done = False
        """euler = tf.transformations.euler_from_quaternion([data_imu.orientation.x,data_imu.orientation.y,data_imu.orientation.z,data_imu.orientation.w])
        roll = euler[0]
        pitch = euler[1]
        yaw = euler[2]

        pitch_bad = not(-self.max_incl < pitch < self.max_incl)
        roll_bad = not(-self.max_incl < roll < self.max_incl)
        altitude_bad = data_position.position.z > self.max_altitude

        if altitude_bad or pitch_bad or roll_bad:
            rospy.loginfo ("(Drone flight status is wrong) >>> ("+str(altitude_bad)+","+str(pitch_bad)+","+str(roll_bad)+")")
            done = True
            reward = -200
        else:
            reward, reached_goal = self.get_reward(data_pose)
            if reached_goal:
                print('Reached Goal!')
                done = True"""

        reward, reached_goal = self.get_reward(data_pose)
        if reached_goal:
            print('Reached Goal!')
            done = True

        # Check spatial limits: if fly out of the limit, episode finished
        current_pose = [
            data_pose.position.x, data_pose.position.y, data_pose.position.z
        ]
        #if current_pose[2] < self.floor:
        #    reward -= 50
        #    done = True
        if abs(current_pose[0]) > self.x_limit or abs(
                current_pose[1]) > self.y_limit or current_pose[
                    2] > self.ceiling or current_pose[2] < self.floor:
            reward -= 10
            done = True
        else:
            reward += 1

        return reward, done

    # Now the reward just related to the current_pose and goal
    def get_reward(self, data_pose):
        reward = 0
        reached_goal = False

        error = self._distance(data_pose)
        current_pose = [
            data_pose.position.x, data_pose.position.y, data_pose.position.z
        ]

        if error < 1:  #self.goal_threshold
            reward += 50  #self.goal_reward
            reached_goal = True
        else:
            if error >= self.best_dist:
                #reward -= abs(self.best_dist - error)
                reward -= 1
            else:
                reward += 1
                #reward += abs(self.best_dist - error)
                self.best_dist = error

            #reward += np.linalg.norm(np.subtract(self.prev_state, self.goal_pose)) - np.linalg.norm(np.subtract(current_pose, self.goal_pose))

        return reward, reached_goal

    # Calculate the distance
    def _distance(self, data_pose):
        current_pose = [data_pose.position.x, data_pose.position.y, 5]

        err = np.subtract(current_pose, self.goal_pose)
        w = np.array([1, 1, 4])
        err = np.multiply(w, err)
        dist = np.linalg.norm(err)
        return dist

    # Check if the command velocity is achieved
    def velocity_check(self, vel_cmd):
        cmd_achieved = False

        #current_vel_raw = rospy.wait_for_message('/drone_1/ground_truth/state', Odometry, timeout=10)
        current_vel_raw = rospy.wait_for_message('/drone_1/fix_velocity',
                                                 Vector3Stamped,
                                                 timeout=10)
        #current_vel = current_vel_raw.twist.twist
        current_vel = current_vel_raw.vector
        vx_errer = vel_cmd.linear.x - current_vel.x
        vy_errer = vel_cmd.linear.y - current_vel.y
        vz_errer = vel_cmd.linear.z - current_vel.z

        if abs(vx_errer) < 0.05 and abs(vy_errer) < 0.05 and abs(
                vz_errer) < 0.05:
            #print("Cmd achieved!")
            cmd_achieved = True

        return cmd_achieved
예제 #28
0
class RobotGazeboEnv(gym.Env):
    def __init__(self,
                 robot_name_space,
                 controllers_list,
                 reset_controls,
                 start_init_physics_parameters=True,
                 reset_world_or_sim="SIMULATION"):

        # To reset Simulations
        rospy.logdebug("START init RobotGazeboEnv")
        self.gazebo = GazeboConnection(start_init_physics_parameters,
                                       reset_world_or_sim)
        self.controllers_object = ControllersConnection(
            namespace=robot_name_space, controllers_list=controllers_list)
        self.reset_controls = reset_controls
        self.seed()

        # Set up ROS related variables
        self.episode_num = 0
        self.cumulated_episode_reward = 0
        self.reward_pub = rospy.Publisher('/openai/reward',
                                          RLExperimentInfo,
                                          queue_size=1)
        rospy.logdebug("END init RobotGazeboEnv")

    # Env methods
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Function executed each time step.
        Here we get the action execute it in a time step and retrieve the
        observations generated by that action.
        :param action:
        :return: obs, reward, done, info
        """
        """
        Here we should convert the action num to movement action, execute the action in the
        simulation and get the observations result of performing that action.
        """
        rospy.logdebug("START STEP OpenAIROS")

        self.gazebo.unpauseSim()
        self._set_action(action)
        self.gazebo.pauseSim()
        obs = self._get_obs()
        done = self._is_done(obs)
        info = {}
        reward = self._compute_reward(obs, done)
        self.cumulated_episode_reward += reward

        rospy.logdebug("END STEP OpenAIROS")

        return obs, reward, done, info

    def reset(self):
        rospy.logdebug("Reseting RobotGazeboEnvironment")
        self._reset_sim()
        self._init_env_variables()
        self._update_episode()
        obs = self._get_obs()
        rospy.logdebug("END Reseting RobotGazeboEnvironment")
        return obs

    def close(self):
        """
        Function executed when closing the environment.
        Use it for closing GUIS and other systems that need closing.
        :return:
        """
        rospy.logdebug("Closing RobotGazeboEnvironment")
        rospy.signal_shutdown("Closing RobotGazeboEnvironment")

    def _update_episode(self):
        """
        Publishes the cumulated reward of the episode and 
        increases the episode number by one.
        :return:
        """
        rospy.logwarn("PUBLISHING REWARD...")
        self._publish_reward_topic(self.cumulated_episode_reward,
                                   self.episode_num)
        rospy.logwarn("PUBLISHING REWARD...DONE=" +
                      str(self.cumulated_episode_reward) + ",EP=" +
                      str(self.episode_num))

        self.episode_num += 1
        self.cumulated_episode_reward = 0

    def _publish_reward_topic(self, reward, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)

    # Extension methods
    # ----------------------------
    def _reset_sim(self):
        """Resets a simulation
        """
        rospy.logdebug("RESET SIM START")
        if self.reset_controls:
            rospy.logdebug("RESET CONTROLLERS")
            self.gazebo.unpauseSim()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self._set_init_pose()
            self.gazebo.pauseSim()
            self.gazebo.resetSim()
            self.gazebo.unpauseSim()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self.gazebo.pauseSim()

        else:
            rospy.logwarn("DONT RESET CONTROLLERS")
            self.gazebo.unpauseSim()
            self._check_all_systems_ready()
            self._set_init_pose()
            self.gazebo.pauseSim()
            self.gazebo.resetSim()
            self.gazebo.unpauseSim()
            self._check_all_systems_ready()
            self.gazebo.pauseSim()

        rospy.logdebug("RESET SIM END")
        return True

    def _set_init_pose(self):
        """Sets the Robot in its init pose
        """
        raise NotImplementedError()

    def _check_all_systems_ready(self):
        """
        Checks that all the sensors, publishers and other simulation systems are
        operational.
        """
        raise NotImplementedError()

    def _get_obs(self):
        """Returns the observation.
        """
        raise NotImplementedError()

    def _init_env_variables(self):
        """Inits variables needed to be initialised each time we reset at the start
        of an episode.
        """
        raise NotImplementedError()

    def _set_action(self, action):
        """Applies the given action to the simulation.
        """
        raise NotImplementedError()

    def _is_done(self, observations):
        """Indicates whether or not the episode is done ( the robot has fallen for example).
        """
        raise NotImplementedError()

    def _compute_reward(self, observations, done):
        """Calculates the reward to give based on the observations given.
        """
        raise NotImplementedError()

    def _env_setup(self, initial_qpos):
        """Initial configuration of the environment. Can be used to configure initial state
        and extract information from the simulation.
        """
        raise NotImplementedError()
예제 #29
0
class LaelapsEnvEllipse(gym.Env):
    def __init__(self):

        # fixed frame is nearly on the origin of the ground 0,0,0.6 at laelaps body center of mass
        self.last_base_position = [0, 0, 0]
        self.distance_weight = 1
        self.drift_weight = 2
        self.time_step = 0.001  # default Gazebo simulation time step

        self.episode_number = 0

        self.frames = 0

        self.torques_step = []
        self.euler_angles = []
        self.euler_rates = []
        self.base_zaxis = []
        self.base_x_y = []
        self.sim_t = []
        self.saving_option = False

        # Rospy get parameters from config file:
        self.laelaps_model_number = rospy.get_param("/laelaps_model_number")
        self.ramp_model_number = rospy.get_param("/ramp_model_number")
        self.ramp_available = rospy.get_param("/ramp_available")
        self.env_goal = rospy.get_param("/env_goal")

        self.gazebo = GazeboConnection()
        self.controllers_object = ControllersConnection(
            namespace="laelaps_robot", controllers_list=controllers_list)

        # _______________SUBSCRIBERS__________________________________________________

        # give base position and quaternions
        rospy.Subscriber(gazebo_model_states_topic, ModelStates,
                         self.models_callback)
        # give motor angle, velocity, torque
        rospy.Subscriber(joint_state_topic, JointState,
                         self.joints_state_callback)
        rospy.Subscriber("/clock", Clock, self.clock_callback)

        rospy.Subscriber(rf_toe_nan_topic, Bool, self.rf_toe_nan_callback)
        rospy.Subscriber(rh_toe_nan_topic, Bool, self.rh_toe_nan_callback)
        rospy.Subscriber(lf_toe_nan_topic, Bool, self.lf_toe_nan_callback)
        rospy.Subscriber(lh_toe_nan_topic, Bool, self.lh_toe_nan_callback)

        # _______________MOTOR PUBLISHERS__________________________________________________

        self.motor_pub = list()

        for joint in motor_joints:
            joint_controller = "/laelaps_robot/" + str(joint) + "/command"
            x = rospy.Publisher(joint_controller, Float64, queue_size=1)
            self.motor_pub.append(x)
        #

        # _______________Toe PUBLISHERS__________________________________________________

        #  toe4_pos_publisher node : RH_foot: toe1 , RF_foot: toe2, LF_foot: toe3, LH_foot: toe4
        self.toe_pub = list()

        for idx in range(len(laelaps_feet)):
            toe_commands = "/laelaps_robot/toe" + str(idx + 1) + "/command"
            x = rospy.Publisher(toe_commands, Toe, queue_size=1)
            self.toe_pub.append(x)

        # ______________Defining observation and action space____________________________

        observation_high = (self.GetObservationUpperBound() + observation_eps)
        observation_low = (self.GetObservationLowerBound() - observation_eps)

        # Four legs toe x,y are estimated by RL
        low = [x_low] * 4
        low.extend([y_low] * 4)
        high = [x_high] * 4
        high.extend([y_high] * 4)
        self.action_space = spaces.Box(low=np.array(low),
                                       high=np.array(high),
                                       dtype=np.float32)

        # the epsilon to avoid 0 values entering the neural network in the algorithm
        observation_high = (self.GetObservationUpperBound() + observation_eps)
        observation_low = (self.GetObservationLowerBound() - observation_eps)

        self.observation_space = spaces.Box(observation_low,
                                            observation_high,
                                            dtype=np.float32)

        # ______________Reset and seed the environment____________________________
        self.seed()  # seed the environment in the initial function
        self.init_reset()  # reset the environment in the initial function

    def GetObservationUpperBound(self):
        upper_bound = np.zeros(6)  # 6 observation space dimension

        upper_bound[0:3] = [2 * math.pi] * 3  # roll,pitch yaw
        upper_bound[3:6] = [2 * math.pi / self.time_step] * \
            3  # Roll, pitch, yaw rate

        return upper_bound

    def GetObservationLowerBound(self):
        return -1 * self.GetObservationUpperBound()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

# _______________call back functions____________________

    def clock_callback(self, msg):
        global sim_time
        sim_time_s = msg.clock.secs
        sim_time_ns = msg.clock.nsecs * 1e-9
        sim_time = sim_time_s + sim_time_ns
        if episode_start:
            if self.saving_option:
                self.sim_t.append(sim_time)

    def models_callback(self, msg):
        self.base_pos = msg.pose[self.laelaps_model_number].position
        self.base_orientation = msg.pose[self.laelaps_model_number].orientation
        self.base_velocity = msg.twist[self.laelaps_model_number].linear
        self.base_angular_velocity = msg.twist[
            self.laelaps_model_number].angular
        if self.ramp_available:
            # both ramps have the same inclination
            self.ramp_inclination = msg.pose[
                self.ramp_model_number].orientation

        if episode_start:
            if self.saving_option:
                self.base_zaxis.append([self.base_pos.z, self.base_velocity.z])
                self.base_x_y.append([self.base_pos.x, self.base_pos.y])
                euler = self.transfrom_euler_from_quaternion([
                    self.base_orientation.x, self.base_orientation.y,
                    self.base_orientation.z, self.base_orientation.w
                ])
                self.euler_angles.append(euler)
                self.euler_rates.append([
                    self.base_angular_velocity.x, self.base_angular_velocity.y,
                    self.base_angular_velocity.z
                ])

    def GetLaelapsBaseInfo(self):
        p = self.base_pos
        q = self.base_orientation
        v = self.base_velocity
        w = self.base_angular_velocity
        base_rot_x = q.x
        base_rot_y = q.y
        base_rot_z = q.z
        base_rot_w = q.w
        base_orientation = [base_rot_x, base_rot_y, base_rot_z, base_rot_w]
        base_p_x = p.x
        base_p_y = p.y
        base_p_z = p.z
        base_pos = [base_p_x, base_p_y, base_p_z]
        base_v_x = v.x
        base_v_y = v.y
        base_v_z = v.z
        base_vel = [base_v_x, base_v_y, base_v_z]
        base_w_x = w.x
        base_w_y = w.y
        base_w_z = w.z
        base_angular_vel = [base_w_x, base_w_y, base_w_z]
        return base_pos, base_orientation, base_vel, base_angular_vel

    def GetRampInclination(self):
        ramp_q = self.ramp_inclination
        ramp_rot_x = ramp_q.x
        ramp_rot_y = ramp_q.y
        ramp_rot_z = ramp_q.z
        ramp_rot_w = ramp_q.w
        ramp_orientation = [ramp_rot_x, ramp_rot_y, ramp_rot_z, ramp_rot_w]
        euler = self.transfrom_euler_from_quaternion(ramp_orientation)
        ramp_inclination = euler[1]  # ramp inclination pitch (around y-axis)
        return ramp_inclination

    def joints_state_callback(self, msg):
        global joints_states
        joints_states = msg

    def GetMotorAngles(self):
        global joints_states
        motor_angles = joints_states.position
        return motor_angles

    def GetMotorVelocities(self):
        global joints_states
        motor_velocities = joints_states.velocity
        return motor_velocities

    def GetMotorTorques(self):
        global joints_states
        motor_torques = joints_states.effort
        return motor_torques
        if episode_start:
            if self.saving_option:
                self.torques_step.append(joints_states.effort)

    def rf_toe_nan_callback(self, msg):
        self.RF_toe_isnan = msg.data

    def rh_toe_nan_callback(self, msg):
        self.RH_toe_isnan = msg.data

    def lf_toe_nan_callback(self, msg):
        self.LF_toe_isnan = msg.data

    def lh_toe_nan_callback(self, msg):
        self.LH_toe_isnan = msg.data

    def GetNanToeCheck(self):
        return [
            self.RF_toe_isnan, self.RH_toe_isnan, self.LF_toe_isnan,
            self.LH_toe_isnan
        ]

    # _______________Quaternion to Euler conversion_____________________________________

    def transfrom_euler_from_quaternion(self, quaternion):
        # input quaternion should be list example: [0.06146124, 0, 0, 0.99810947]
        q = np.array(quaternion[:4], dtype=np.float64, copy=True)
        nq = np.dot(q, q)  # gives scalar
        if nq < eps:
            H = np.identity(4)  # identity matrix of 4x4
        q *= math.sqrt(2.0 / nq)
        q = np.outer(q, q)
        H = np.array(
            ((1.0 - q[1, 1] - q[2, 2], q[0, 1] - q[2, 3], q[0, 2] + q[1, 3],
              0.0), (q[0, 1] + q[2, 3], 1.0 - q[0, 0] - q[2, 2],
                     q[1, 2] - q[0, 3], 0.0),
             (q[0, 2] - q[1, 3], q[1, 2] + q[0, 3], 1.0 - q[0, 0] - q[1, 1],
              0.0), (0.0, 0.0, 0.0, 1.0)),
            dtype=np.float64)  # Homogenous transformation matrix

        # Obtain euler angles from Homogenous transformation matrix:
        # Note that many Euler angle triplets can describe one matrix.
        # take only first three rows and first three coloumns = rotation matrix
        M = np.array(H, dtype=np.float64, copy=False)[:3, :3]
        i = 0  # x-axes (first coloumn)
        j = 1  # y-axes
        k = 2  # z-axis
        cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i])
        if cy > eps:
            ax = math.atan2(M[k, j], M[k, k])
            ay = math.atan2(-M[k, i], cy)
            az = math.atan2(M[j, i], M[i, i])
        else:
            ax = math.atan2(-M[j, k], M[j, j])
            ay = math.atan2(-M[k, i], cy)
            az = 0.0
        euler = [ax, ay, az]
        return euler  # roll, pitch ,yaw

    # __________________________________Get Observations____________________________________________________________
    def GetObservation(self):
        observation = []
        # returns base: position, orientation(in quaternion), linear velocity, angular velocity
        _, base_orientation_quaternion, _, euler_rate = self.GetLaelapsBaseInfo(
        )
        euler = self.transfrom_euler_from_quaternion(
            base_orientation_quaternion)
        observation.extend(euler)
        observation.extend(euler_rate)
        return observation

    # ____________________________________ROS wait_________________________________________________________________________________

    def ros_wait(self, t):
        # It makes the execution of the next command in the python-ROS node wait for t secs, equivalent to rospy.sleep(time)
        start = rospy.get_rostime()
        ros_time_start = start.secs + start.nsecs * 1e-9
        timeout = ros_time_start + t  # example 0.6 sec
        wait = True
        while wait:
            now = rospy.get_rostime()
            ros_time_now = now.secs + now.nsecs * 1e-9
            if ros_time_now >= timeout:
                wait = False

    # _______________Publish commands from ROS to Gazebo__________________________________________________________________________________

    def publish_threads_motor_angles(self, i, motor_angle):
        self.motor_pub[i].publish(motor_angle[i])

    def publish_motor_angles(self, motor_angle):

        threads = list()

        for index in range(8):
            # logging.info("Main    : create and start thread %d.", index)
            x = threading.Thread(target=self.publish_threads_motor_angles,
                                 args=(index, motor_angle))
            threads.append(x)
            x.start()

        for index, thread in enumerate(threads):
            # logging.info("Main    : before joining thread %d.", index)
            thread.join()
            # logging.info("Main    : thread %d done", index)

    def publish_threads_toe(self, i, toe_commands, phase):
        toe_class_msg.toex = toe_commands[i]  # toe_x
        toe_class_msg.toey = toe_commands[i + 4]  # toe_y
        toe_class_msg.phase = phase[i]
        self.toe_pub[i].publish(toe_class_msg)

    def pubilsh_toe_commands(self, toe_x_y, phase_shift):
        threads = list()

        for index in range(len(laelaps_feet)):
            # toe_class_msg=Toe()
            x = threading.Thread(target=self.publish_threads_toe,
                                 args=(index, toe_x_y, phase_shift))
            threads.append(x)
            x.start()

        for index, thread in enumerate(threads):
            thread.join()

    # _______________Reset function__________________________________________________________________________________
    def init_reset(self):
        global init_angles, init_toe_commands, init_phase_shift
        self.gazebo.unpauseSim()
        self.gazebo.resetSim()
        self.gazebo.pauseSim()
        self.gazebo.resetJoints(init_angles)
        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.pubilsh_toe_commands(init_toe_commands, init_phase_shift)
        # wait some time until robot stabilizes itself in the environment
        self.ros_wait(0.01)
        self.gazebo.pauseSim()
        self.episode_reward = 0
        return self.GetObservation()

    def reset(self):

        global init_angles, init_toe_commands, init_phase_shift, saving_path, episode_savingpath
        self.gazebo.unpauseSim()
        self.gazebo.resetSim()
        self.gazebo.pauseSim()
        # rospy.loginfo("-----RESET-----")
        self.gazebo.resetJoints(init_angles)  # reset joint angles
        self.gazebo.unpauseSim()

        # Reset JoinStateControlers because resetSim doesnt reset TFs, generating issues with simulation time
        self.controllers_object.reset_controllers()

        self.pubilsh_toe_commands(init_toe_commands, init_phase_shift)
        self.ros_wait(0.01)

        self.gazebo.pauseSim()
        # self.plot_tensorboard("Episode Total Rewards", self.episode_reward,self.frames)
        # Initialize Episode Values
        # reset the variable base position to 0
        self.last_base_position = [0.0, 0.0, 0.0]
        self.episode_step = 0
        self.episode_reward = 0

        self.episode_number += 1
        episode_savingpath = str(saving_path) + \
            "/Episode_" + str(self.episode_number)
        if self.saving_option:
            os.makedirs(episode_savingpath)

        self.save_time("Start")

        return self.GetObservation()  # initial state S_0

    # _______________Step function__________________________________________________________________________________
    def step(self, action):

        global x_high, x_low, y_high, y_low, step_phase_shift
        # Action values from NN from [-1,1] are mapped to the action space limits
        action_x = interp(action[0:4], [-1, 1], [x_low, x_high])
        action_y = interp(action[4:8], [-1, 1], [y_low, y_high])
        action = np.append([action_x], [action_y])

        # rospy.loginfo("------Step Action: %s",action)
        time_out = rospy.get_time() + step_time_out

        while True:

            self.gazebo.unpauseSim()
            self.pubilsh_toe_commands(action, step_phase_shift)
            self.gazebo.pauseSim()
            done = self.termination()
            if rospy.get_time() > time_out or done == True:
                break
            else:
                continue

        self.gazebo.pauseSim()

        reward = self.reward()

        self.episode_reward += reward
        # rospy.loginfo("-------------EnvFram idx %s : Reward in Env per Step %s-------------", self.frames ,reward)
        # Tensorboard
        # self.plot_tensorboard("env step r", reward,self.frames)

        self.frames += 1
        self.episode_step += 1

        return self.GetObservation(), reward, done, {}

    # ______________Episode Termination________________________________________________________________________________
    def termination(self):
        global episode_start
        pos, quaternion, _, _ = self.GetLaelapsBaseInfo()

        roll, pitch, yaw = self.transfrom_euler_from_quaternion(quaternion)

        if self.ramp_available:
            # pitch angle of the ramp, inclined slopes goal 3.2 m
            ramp_pitch = self.GetRampInclination()
            is_fallen = math.fabs(roll) > 0.3 or math.fabs(
                pitch) > 0.3 + math.fabs(ramp_pitch) or math.fabs(
                    yaw
                ) > 0.2 or math.fabs(pos[0]) >= self.env_goal or math.fabs(
                    pos[1]) > 0.2  # 3.2 m is nearly the end of the ramp
        else:
            ramp_pitch = 0  # flat terrain goal 6 m
            is_fallen = math.fabs(roll) > 0.3 or math.fabs(
                pitch) > 0.3 + math.fabs(ramp_pitch) or math.fabs(
                    yaw) > 0.2 or math.fabs(
                        pos[0]) >= self.env_goal or math.fabs(
                            pos[1]) > 0.2 or math.fabs(pos[2]) > 0.17

        if is_fallen:
            self.save_time("Finish")  # episode finish
            self.save_termination([
                math.fabs(roll),
                math.fabs(pitch),
                math.fabs(yaw),
                math.fabs(pos[0]),
                math.fabs(pos[1])
            ])
            rospy.loginfo(
                "ROll: %s : %s , Pitch: %s : %s , Yaw: %s : %s , X: %s : %s , Y: %s : %s , Z: %s : %s",
                math.fabs(roll),
                math.fabs(roll) > 0.3, math.fabs(pitch),
                math.fabs(pitch) > 0.3 + math.fabs(ramp_pitch), math.fabs(yaw),
                math.fabs(yaw) > 0.2, math.fabs(pos[0]),
                math.fabs(pos[0]) >= 3.2, math.fabs(pos[1]),
                math.fabs(pos[1]) >= 0.2, math.fabs(pos[2]),
                math.fabs(pos[2]) > 0.17)
            # SAVING
            self.save_torques(self.torques_step, self.frames)
            self.save_baze_zaxis(self.base_zaxis, self.frames)
            self.save_baze_x_y(self.base_x_y, self.frames)
            self.save_euler_angles(self.euler_angles, self.frames)
            self.save_angular_velocities(self.euler_rates, self.frames)
            self.save_sim_time(self.sim_t, self.frames)
            self.torques_step = []
            self.euler_angles = []
            self.euler_rates = []
            self.base_zaxis = []
            self.base_x_y = []
            self.sim_t = []
            episode_start = False
        return is_fallen

    # _______________Reward function__________________________________________________________________________________
    def reward(self):

        nan_check = self.GetNanToeCheck()
        if any(nan_check):
            rospy.loginfo(
                "---Angles Inverse Kinematics: NaN %s, try again---r=-1",
                nan_check)
            reward = -1
        else:
            if self.ramp_available:
                ramp_pitch = self.GetRampInclination(
                )  # pitch angle of the ramp
            else:
                ramp_pitch = 0
            current_base_position, _, _, _ = self.GetLaelapsBaseInfo()

            # Reward forward motion
            forward_reward = math.fabs(
                current_base_position[0] / math.cos(ramp_pitch)) - math.fabs(
                    self.last_base_position[0] / math.cos(ramp_pitch))
            # rospy.loginfo("current_base_position[0] %s self.last_base_position[0] %s forward_reward %s",math.fabs(current_base_position[0]),math.fabs(self.last_base_position[0]),forward_reward )

            # Penalize drifting
            drift_reward = math.fabs(current_base_position[1]) - math.fabs(
                self.last_base_position[1])
            # rospy.loginfo("current_base_position[1] %s self.last_base_position[1] %s drift_reward %s",math.fabs(current_base_position[1]),math.fabs(self.last_base_position[1]),drift_reward )

            self.last_base_position = current_base_position

            reward = (self.distance_weight * forward_reward -
                      self.drift_weight * drift_reward)
        return reward

    # ______________Saving values and ploting tensorboard_____________________________________________-

    def tensorboardwriter(self, folder, use_tensorboardEnV,
                          use_tensorboardAlg):
        self.enable_tensorboard = use_tensorboardEnV
        if use_tensorboardEnV or use_tensorboardAlg:
            self.writer = SummaryWriter(folder)
        else:
            self.writer = []
        return self.writer

    def savingpath(self, folder, saving_option):
        global saving_path
        self.saving_option = saving_option
        saving_path = folder

    def plot_tensorboard(self, title, var1, var2):
        if self.enable_tensorboard:
            self.writer.add_scalar(title, var1, var2)

    def save_actions(self, action, step_number):
        global episode_savingpath
        if self.saving_option:
            action = np.array(action, dtype=np.float64)
            np.save(str(episode_savingpath) + "/" + str(step_number), action)

    def save_torques(self, torques, step_number):
        global episode_savingpath
        if self.saving_option:
            torques = np.array(torques, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/Torques_" + str(step_number),
                torques)

    def save_euler_angles(self, euler_angles, step_number):
        global episode_savingpath
        if self.saving_option:
            euler_angles = np.array(euler_angles, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/EulerAngles_" + str(step_number),
                euler_angles)

    def save_angular_velocities(self, angular_velocities, step_number):
        global episode_savingpath
        if self.saving_option:
            angular_velocities = np.array(angular_velocities, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/AngularVelocities_" +
                str(step_number), angular_velocities)

    def save_baze_zaxis(self, base_zaxis, step_number):
        global episode_savingpath
        if self.saving_option:
            base_zaxis = np.array(base_zaxis, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/BaseZaxis_" + str(step_number),
                base_zaxis)

    def save_baze_x_y(self, base_x_y, step_number):
        global episode_savingpath
        if self.saving_option:
            base_x_y = np.array(base_x_y, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/BaseX_Y" + str(step_number),
                base_x_y)

    def save_sim_time(self, sim_t, step_number):
        global episode_savingpath
        if self.saving_option:
            sim_t = np.array(sim_t, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/Sim_t" + str(step_number), sim_t)

    def save_time(self, start_or_finish):
        global episode_savingpath
        if self.saving_option:
            episode_time = str(time.ctime(time.time()))
            np.save(
                str(episode_savingpath) + "/Episode_" + str(start_or_finish),
                episode_time)

    def save_termination(self, termination_critiria):
        global episode_savingpath
        if self.saving_option:
            np.save(
                str(episode_savingpath) + "/termination", termination_critiria)

    def close(self):
        rospy.logdebug("Closing RobotGazeboEnvironment")
        rospy.signal_shutdown("Closing RobotGazeboEnvironment")
class multi_UAV_Env(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment
        self.set_link = rospy.ServiceProxy('/gazebo/set_link_state',
                                           SetLinkState)
        rospy.wait_for_service('/gazebo/set_link_state')
        # gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.x = rospy.get_param(
            "/desired_pose/position/x")
        self.desired_pose.position.y = rospy.get_param(
            "/desired_pose/position/y")
        self.desired_pose.position.z = rospy.get_param(
            "/desired_pose/position/z")
        self.desired_pose.orientation.x = rospy.get_param(
            "/desired_pose/orientation/x")
        self.desired_pose.orientation.y = rospy.get_param(
            "/desired_pose/orientation/y")
        self.desired_pose.orientation.z = rospy.get_param(
            "/desired_pose/orientation/z")

        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_vel = rospy.get_param("/max_vel")
        self.min_vel = rospy.get_param("/min_vel")
        self.max_acc = rospy.get_param("/max_acc")
        self.min_acc = rospy.get_param("/min_acc")
        self.max_jerk = rospy.get_param("/max_jerk")
        self.min_jerk = rospy.get_param("/min_jerk")
        self.max_snap = rospy.get_param("/max_snap")
        self.min_snap = rospy.get_param("/min_snap")

        self.done_reward = rospy.get_param("/done_reward")
        self.alive_reward = rospy.get_param("/alive_reward")

        self.num_action_space = 15

        high = np.array([1. for _ in range(self.num_action_space)])
        low = np.array([-1. for _ in range(self.num_action_space)])
        self.action_space = spaces.Box(low=low, high=high)

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="monoped")

        self.multi_uav_state_object = MultiUavState(
            max_vel=self.max_vel,
            min_vel=self.min_vel,
            max_acc=self.max_acc,
            min_acc=self.min_acc,
            max_jerk=self.max_jerk,
            min_jerk=self.min_jerk,
            max_snap=self.max_snap,
            min_snap=self.min_snap,
            abs_max_roll=self.max_incl,
            abs_max_pitch=self.max_incl,
            done_reward=self.done_reward,
            alive_reward=self.alive_reward)

        self.multi_uav_state_object.set_desired_world_point(
            self.desired_pose.position.x, self.desired_pose.position.y,
            self.desired_pose.position.z, self.desired_pose.orientation.x,
            self.desired_pose.orientation.y, self.desired_pose.orientation.z)

        self.mellingers = [
            MellingerMARL('hummingbird', 0, 0.95, 0.0, 0.35),
            MellingerMARL('hummingbird', 1, 0.0, 0.95, 0.35),
            MellingerMARL('hummingbird', 2, -0.95, 0.0, 0.35),
            MellingerMARL('hummingbird', 3, 0.0, -0.95, 0.35)
        ]
        """
        For this version, we consider 6 actions
        1-2) Increment/Decrement haa_joint
        3-4) Increment/Decrement hfe_joint
        5-6) Increment/Decrement kfe_joint
        """
        self.action_space = spaces.Discrete(6)
        self.reward_range = (-np.inf, np.inf)

        self._seed()
        print("end of init...")

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # 0st: We pause the Simulator
        print("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        print("Reset SIM...")
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()

        # model_name = "four_quad_payload"
        # rospy.wait_for_service('/gazebo/delete_model')
        # del_model_prox = rospy.ServiceProxy('gazebo/delete_model', DeleteModel)
        # del_model_prox(model_name)

        # model_xml = rospy.get_param("robot_description")
        # pose = Pose()
        # pose.orientation.w = 1.0

        # spawn_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel)
        # req = SpawnModelRequest()
        # req.model_name = model_name
        # req.model_xml = model_xml
        # # should this be unique so ros_control can use each individually?
        # req.robot_namespace = "/foo"
        # req.initial_pose = pose
        # resp = spawn_model(req)

        self.gazebo.resetWorld()

        # os.system('gnome-terminal -x roslaunch rotors_gazebo spawn_quad_sphere_load.launch')

        # initial_pose = Pose()
        # initial_pose.position.x = 0
        # initial_pose.position.y = 0
        # initial_pose.position.z = 0

        # f = open('/home/icl-baby/catkin_ws/src/Collaborative_Aerial_Transportation/rotors_description/urdf/collaborative/four_hummingbirds_payload.xacro')
        # urdf = f.read()
        # rospy.wait_for_service('gazebo/spawn_urdf_model')
        # spawn_model_prox = rospy.ServiceProxy('gazebo/spawn_urdf_model', SpawnModel)
        # spawn_model_prox("four_quad_payload", urdf, "hummingbird", initial_pose, "world")

        # for i in range(4):
        #     rospy.wait_for_service('/gazebo/set_link_state')
        #     link_imu_name = 'hummingbird_' + str(i) + '/imugt_link'
        #     self.set_link(LinkState(link_name=link_imu_name))
        #     for j in range(4):
        #         rospy.wait_for_service('/gazebo/set_link_state')
        #         link_name = 'hummingbird_' + str(i) + '/rotor_' + str(j)
        #         print link_name
        #         self.set_link(LinkState(link_name=link_name))
        #     rospy.wait_for_service('/gazebo/set_link_state')
        #     link_odom_name = 'hummingbird_' + str(i) + '/odometry_sensorgt_link'
        #     self.set_link(LinkState(link_name=link_odom_name))

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        print("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.8)

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        # rospy.logdebug("reset_monoped_joint_controllers...")
        # self.controllers_object.reset_monoped_joint_controllers()

        # 3rd: resets the robot to initial conditions
        # print("set_init_pose...")
        # for controller in self.mellingers:
        #     motor_speed = np.zeros((4, 1))
        #     controller.set_desired_motor_speed(motor_speed)
        # controller.send_motor_command()

        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
        print("check_all_systems_ready...")
        self.multi_uav_state_object.check_all_systems_ready()
        # rospy.logdebug("get_observations...")
        # observation = self.multi_uav_state_object.get_states()

        # 6th: We restore the gravity to original
        print("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # 7th: pauses simulation
        print("Pause SIM...")
        self.gazebo.pauseSim()

        # Get the State Discrete Stringuified version of the observations
        state = self.get_state()

        return state

    def _step(self, actions):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, decide which action corresponsd to which joint is incremented

        # next_action_position = self.multi_uav_state_object.get_action_to_position(action)

        # update the reference in the mellinger controller from the action
        for i, action in enumerate(actions):
            self.mellingers[i].set_ref_from_action(action)

        # We move it to that pos
        self.gazebo.unpauseSim()
        # action to the gazebo environment
        for i_mellinger in self.mellingers:
            i_mellinger.publish_err()
            i_mellinger.update_current_state()
            i_mellinger.update_desired_values()
            i_mellinger.motorSpeedFromU()
            i_mellinger.send_motor_command()

        # self.monoped_joint_pubisher_object.move_joints(next_action_position)
        # Then we send the command to the robot and let it go
        # for running_step seconds
        time.sleep(self.running_step)
        self.gazebo.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        # observation = self.multi_uav_state_object.get_observations()

        # finally we get an evaluation based on what happened in the sim
        # reward,done = self.multi_uav_state_object.process_data()

        reward, done = self.multi_uav_state_object.calculate_reward_payload_orientation(
        )

        # Get the State Discrete Stringuified version of the observations
        # state = self.get_state(observation)
        state = self.get_state()

        return state, reward, done

    def get_state(self):
        """
        We retrieve the Stringuified-Discrete version of the given observation
        :return: state
        """
        # return self.multi_uav_state_object.get_state_as_string(observation)

        return self.multi_uav_state_object.get_states()