class CartPole3DEnv(gym.Env): #class inherit from gym.Env.

    def __init__(self): # We initialize and define init function.
        number_actions = rospy.get_param('/cart_pole_3d/n_actions')
        self.action_space = spaces.Discrete(number_actions)
        self.state_size = 3

        self._seed()

        # get configuration parameters
        self.init_cart_vel = rospy.get_param('/cart_pole_3d/init_cart_vel')

        # Actions
        self.cart_speed_fixed_value = rospy.get_param('/cart_pole_3d/cart_speed_fixed_value')

        self.start_point = Point()
        self.start_point.x = rospy.get_param("/cart_pole_3d/init_cube_pose/x")
        self.start_point.y = rospy.get_param("/cart_pole_3d/init_cube_pose/y")
        self.start_point.z = rospy.get_param("/cart_pole_3d/init_cube_pose/z")

        # Done
        self.max_angle = rospy.get_param('/cart_pole_3d/max_angle')
        self.max_distance = rospy.get_param('/cart_pole_3d/max_distance')

        # Rewards
        self.keep_pole_up_reward = rospy.get_param("/cart_pole_3d/keep_pole_up_reward")
        self.end_episode_points = rospy.get_param("/cart_pole_3d/end_episode_points")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()
        self.controllers_list = ['joint_state_controller',
                                 'cart_joint_velocity_controller'
                                 ]
        self.controllers_object = ControllersConnection(namespace="cart_pole_3d",
                                                        controllers_list=self.controllers_list)
        """
        Namespace of robot is the namespace in front of your controller.
        Controllers_list => that we want to reset at each time that we call the reset controllers.
          This case we just have two: joint_state_controller, inertia_wheel_roll_joint_velocity_controller
        We need to create a function which gets retrieves those controllers.
        """

        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()

        rospy.Subscriber("/cart_pole_3d/joint_states", JointState, self.joints_callback)
        rospy.Subscriber("/cart_pole_3d/odom", Odometry, self.odom_callback)
        # rospy.Subscriber("/cart_pole_3d/imu", Imu, self.imu_callback)

        self._cart_velocity_publisher = rospy.Publisher('/cart_pole_3d/cart_joint_velocity_controller/command',
                                             Float64, queue_size=1)

        self.check_publishers_connection()
        self.gazebo.pauseSim()

    def _seed(self, seed=None):  # overriden function
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):  # overriden function

        self.gazebo.unpauseSim()
        self.set_action(action)
        self.gazebo.pauseSim()
        obs = self._get_obs()
        done = self._is_done(obs)
        info = {}
        reward = self.compute_reward(obs, done)
        simplified_obs = self.convert_obs_to_state(obs)

        return simplified_obs, reward, done, info

    def _reset(self):  # We are using a virtual function defined in the gym infrastructure.

        """
        Everytime we start episode, robot will be the same place and configurations. Because otherwise it won't
        learn correctly and we can't iterated. => basic stuff for Reinforcement learning.
        :return:
        """
        self.gazebo.unpauseSim()
        """
        why we need to unpauseSim because resetting controllers and for checking the sensors, we need the simulation
        to be running because otherwise we don't have any sensory data and we don't have access to the controller reset
        functions services they won't work and tell you to hit play. => it is very important.
        """
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()
        self.set_init_pose()
        #initialized robot
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()
        self.gazebo.pauseSim()
        self.init_env_variables()
        obs = self._get_obs()
        simplified_obs = self.convert_obs_to_state(obs)

        return simplified_obs

    def init_env_variables(self):
        """
        Inits variables needed to be initialised each time we reset at the start
        of an episode.
        :return:
        """
        self.total_distance_moved = 0.0
        self.current_y_distance = self.get_y_dir_distance_from_start_point(self.start_point)
        self.cart_current_speed = rospy.get_param('/cart_pole_3d/init_cart_vel')

    def _is_done(self, observations):

        # MAximum distance to travel permited in meters from origin
        max_upper_distance = self.max_distance
        max_lower_distance = -self.max_distance
        max_angle_allowed = self.max_angle

        if (observations[1] > max_upper_distance or observations[1] < max_lower_distance):
            rospy.logerr("Cart Too Far==>" + str(observations[1]))
            done = True
        elif(observations[2] > max_angle_allowed or observations[2] < -max_angle_allowed):
            rospy.logerr("Pole excceed allowed angle =>" + str(observations[2]))
            done = True
        else:
            # rospy.loginfo("Cart NOT Too Far==>" + str(observations[1]))
            # rospy.loginfo("Pole still in range==>" + str(observations[2]))
            done = False

        return done

    def set_action(self, action):

        # We convert the actions to speed movements to send to the parent class CubeSingleDiskEnv
        if action == 0:  # Move left
            self.cart_current_speed = self.cart_speed_fixed_value
        elif action == 1:  # Move right
            self.cart_current_speed = -1*self.cart_speed_fixed_value

        # We clamp Values to maximum
        rospy.logdebug("cart_current_speed before clamp==" + str(self.cart_current_speed))
        self.cart_current_speed = numpy.clip(self.cart_current_speed,
                                          -1 * self.cart_speed_fixed_value,
                                          self.cart_speed_fixed_value)
        rospy.logdebug("cart_current_speed after clamp==" + str(self.cart_current_speed))

        self.move_joints(self.cart_current_speed)

    def _get_obs(self):
        """
        Here we define what sensor data defines our robots observations
        To know which Variables we have acces to, we need to read the
        MyCubeSingleDiskEnv API DOCS
        :return:
        """

        # We get the angle of the pole_angle
        pole_angle = round(self.joints.position[1],3)

        # We get the distance from the origin
        y_distance = self.get_y_dir_distance_from_start_point(self.start_point)

        # We get the current speed of the cart
        current_cart_speed_vel = self.get_cart_velocity()

        cube_observations = [
            round(current_cart_speed_vel, 1),
            round(y_distance, 1),
            round(pole_angle, 1)
        ]

        return cube_observations

    def get_cart_velocity(self):
        # We get the current joint roll velocity
        roll_vel = self.joints.velocity[1]
        return roll_vel

    def get_y_linear_speed(self):
        # We get the current joint roll velocity
        y_linear_speed = self.odom.twist.twist.linear.y
        return y_linear_speed

    def get_y_dir_distance_from_start_point(self, start_point):
        """
        Calculates the distance from the given point and the current position
        given by odometry. In this case the increase or decrease in y.
        :param start_point:
        :return:
        """
        y_dist_dir = self.odom.pose.pose.position.y - start_point.y

        return y_dist_dir

    def get_pole_angle(self):
        return self.imu.orientation.y

    def compute_reward(self, observations, done):
        speed = observations[0]
        distance = observations[1]
        angle = observations[2]

        # if not done:
        #     # positive for keeping the pole up
        #     reward_keep_pole_up = self.keep_pole_up_reward
        #     # nigative Reinforcement
        #     reward_distance = distance * -2
        #
        #     # positive reward on angle
        #     reward_angle = 10 / ((abs(angle) * 1)+1.1)
        #
        #     reward = reward_angle + reward_distance + reward_keep_pole_up
        #
        #     rospy.loginfo("Reward_distance=" + str(reward_distance))
        #     rospy.loginfo("Reward_angle= " + str(reward_angle))
        # else:
        #     reward = -1 * self.end_episode_points
        #
        # return reward
        # rospy.loginfo("pole_angle for reward==>" + str(angle))
        delta = 0.7 - abs(angle)
        reward_pole_angle = math.exp(delta*10)

        # If we are moving to the left and the pole is falling left is Bad
        # rospy.logwarn("pole_vel==>" + str(speed))
        pole_vel_sign = numpy.sign(speed)
        pole_angle_sign = numpy.sign(angle)
        # rospy.logwarn("pole_vel sign==>" + str(pole_vel_sign))
        # rospy.logwarn("pole_angle sign==>" + str(pole_angle_sign))

        # We want inverted signs for the speeds. We multiply by -1 to make minus positive.
        # global_sign + = GOOD, global_sign - = BAD
        base_reward = 500
        if speed != 0:
            global_sign = pole_angle_sign * pole_vel_sign * -1
            reward_for_efective_movement = base_reward * global_sign
        else:
            # Is a particular case. If it doesnt move then its good also
            reward_for_efective_movement = base_reward

        reward = reward_pole_angle + reward_for_efective_movement

        # rospy.logwarn("reward==>" + str(reward)+"= r_pole_angle="+str(reward_pole_angle)+",r_movement= "+str(reward_for_efective_movement))
        return reward

    def joints_callback(self, data):
        self.joints = data

    def odom_callback(self, data):
        self.odom = data


    def check_all_sensors_ready(self):
        self.check_joint_states_ready()
        self.check_odom_ready()
        # self.check_imu_ready()
        rospy.logdebug("ALL SENSORS READY")

    def check_joint_states_ready(self):
        self.joints = None
        while self.joints is None and not rospy.is_shutdown():
            try:
                self.joints = rospy.wait_for_message("/cart_pole_3d/joint_states", JointState, timeout=1.0)
                # check response from this topic for 1 second. if don't received respone, it mean not ready.
                # Assure data channels are working perfectly.
                rospy.logdebug("Current cart_pole_3d/joint_states READY=>" + str(self.joints))

            except:
                rospy.logerr("Current cart_pole_3d/joint_states not ready yet, retrying for getting joint_states")
        return self.joints

    def check_odom_ready(self):
        self.odom = None
        while self.odom is None and not rospy.is_shutdown():
            try:
                self.odom = rospy.wait_for_message("/cart_pole_3d/odom", Odometry, timeout=1.0)
                rospy.logdebug("Current /cart_pole_3d/odom READY=>" + str(self.odom))

            except:
                rospy.logerr("Current /cart_pole_3d/odom not ready yet, retrying for getting odom")

        return self.odom

    def check_publishers_connection(self):
        """
        Checks that all the publishers are working
        :return:
        """
        rate = rospy.Rate(10)  # 10hz
        while (self._cart_velocity_publisher.get_num_connections() == 0 and not rospy.is_shutdown()):
            rospy.logdebug("No susbribers to _cart_velocity_publisher yet so we wait and try again")
            try:
                rate.sleep()
            except rospy.ROSInterruptException:
                # This is to avoid error when world is rested, time when backwards.
                pass
        rospy.logdebug("_base_pub Publisher Connected")

        rospy.logdebug("All Publishers READY")

    def move_joints(self, cart_speed):
        joint_speed_value = Float64()
        joint_speed_value.data = cart_speed
        # rospy.logwarn("cart Velocity >>>>>>>" + str(joint_speed_value))
        self._cart_velocity_publisher.publish(joint_speed_value)

    def set_init_pose(self):
        """Sets the Robot in its init pose
        """
        self.move_joints(self.init_cart_vel)

        return True

    def convert_obs_to_state(self, observations):
        """
        Converts the observations used for reward and so on to the essentials for the robot state
        In this case we only need the orientation of the cube and the speed of the disc.
        The distance doesnt condition at all the actions
        """
        speed = observations[0]
        distance = observations[1]
        angle = observations[2]

        state_converted = [speed, distance, angle]

        return state_converted
Beispiel #2
0
class LaelapsEnvEllipse(gym.Env):
    def __init__(self):

        # fixed frame is nearly on the origin of the ground 0,0,0.6 at laelaps body center of mass
        self.last_base_position = [0, 0, 0]
        self.distance_weight = 1
        self.drift_weight = 2
        self.time_step = 0.001  # default Gazebo simulation time step

        self.episode_number = 0

        self.frames = 0

        self.torques_step = []
        self.euler_angles = []
        self.euler_rates = []
        self.base_zaxis = []
        self.base_x_y = []
        self.sim_t = []
        self.saving_option = False

        # Rospy get parameters from config file:
        self.laelaps_model_number = rospy.get_param("/laelaps_model_number")
        self.ramp_model_number = rospy.get_param("/ramp_model_number")
        self.ramp_available = rospy.get_param("/ramp_available")
        self.env_goal = rospy.get_param("/env_goal")

        self.gazebo = GazeboConnection()
        self.controllers_object = ControllersConnection(
            namespace="laelaps_robot", controllers_list=controllers_list)

        # _______________SUBSCRIBERS__________________________________________________

        # give base position and quaternions
        rospy.Subscriber(gazebo_model_states_topic, ModelStates,
                         self.models_callback)
        # give motor angle, velocity, torque
        rospy.Subscriber(joint_state_topic, JointState,
                         self.joints_state_callback)
        rospy.Subscriber("/clock", Clock, self.clock_callback)

        rospy.Subscriber(rf_toe_nan_topic, Bool, self.rf_toe_nan_callback)
        rospy.Subscriber(rh_toe_nan_topic, Bool, self.rh_toe_nan_callback)
        rospy.Subscriber(lf_toe_nan_topic, Bool, self.lf_toe_nan_callback)
        rospy.Subscriber(lh_toe_nan_topic, Bool, self.lh_toe_nan_callback)

        # _______________MOTOR PUBLISHERS__________________________________________________

        self.motor_pub = list()

        for joint in motor_joints:
            joint_controller = "/laelaps_robot/" + str(joint) + "/command"
            x = rospy.Publisher(joint_controller, Float64, queue_size=1)
            self.motor_pub.append(x)
        #

        # _______________Toe PUBLISHERS__________________________________________________

        #  toe4_pos_publisher node : RH_foot: toe1 , RF_foot: toe2, LF_foot: toe3, LH_foot: toe4
        self.toe_pub = list()

        for idx in range(len(laelaps_feet)):
            toe_commands = "/laelaps_robot/toe" + str(idx + 1) + "/command"
            x = rospy.Publisher(toe_commands, Toe, queue_size=1)
            self.toe_pub.append(x)

        # ______________Defining observation and action space____________________________

        observation_high = (self.GetObservationUpperBound() + observation_eps)
        observation_low = (self.GetObservationLowerBound() - observation_eps)

        # Four legs toe x,y are estimated by RL
        low = [x_low] * 4
        low.extend([y_low] * 4)
        high = [x_high] * 4
        high.extend([y_high] * 4)
        self.action_space = spaces.Box(low=np.array(low),
                                       high=np.array(high),
                                       dtype=np.float32)

        # the epsilon to avoid 0 values entering the neural network in the algorithm
        observation_high = (self.GetObservationUpperBound() + observation_eps)
        observation_low = (self.GetObservationLowerBound() - observation_eps)

        self.observation_space = spaces.Box(observation_low,
                                            observation_high,
                                            dtype=np.float32)

        # ______________Reset and seed the environment____________________________
        self.seed()  # seed the environment in the initial function
        self.init_reset()  # reset the environment in the initial function

    def GetObservationUpperBound(self):
        upper_bound = np.zeros(6)  # 6 observation space dimension

        upper_bound[0:3] = [2 * math.pi] * 3  # roll,pitch yaw
        upper_bound[3:6] = [2 * math.pi / self.time_step] * \
            3  # Roll, pitch, yaw rate

        return upper_bound

    def GetObservationLowerBound(self):
        return -1 * self.GetObservationUpperBound()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

# _______________call back functions____________________

    def clock_callback(self, msg):
        global sim_time
        sim_time_s = msg.clock.secs
        sim_time_ns = msg.clock.nsecs * 1e-9
        sim_time = sim_time_s + sim_time_ns
        if episode_start:
            if self.saving_option:
                self.sim_t.append(sim_time)

    def models_callback(self, msg):
        self.base_pos = msg.pose[self.laelaps_model_number].position
        self.base_orientation = msg.pose[self.laelaps_model_number].orientation
        self.base_velocity = msg.twist[self.laelaps_model_number].linear
        self.base_angular_velocity = msg.twist[
            self.laelaps_model_number].angular
        if self.ramp_available:
            # both ramps have the same inclination
            self.ramp_inclination = msg.pose[
                self.ramp_model_number].orientation

        if episode_start:
            if self.saving_option:
                self.base_zaxis.append([self.base_pos.z, self.base_velocity.z])
                self.base_x_y.append([self.base_pos.x, self.base_pos.y])
                euler = self.transfrom_euler_from_quaternion([
                    self.base_orientation.x, self.base_orientation.y,
                    self.base_orientation.z, self.base_orientation.w
                ])
                self.euler_angles.append(euler)
                self.euler_rates.append([
                    self.base_angular_velocity.x, self.base_angular_velocity.y,
                    self.base_angular_velocity.z
                ])

    def GetLaelapsBaseInfo(self):
        p = self.base_pos
        q = self.base_orientation
        v = self.base_velocity
        w = self.base_angular_velocity
        base_rot_x = q.x
        base_rot_y = q.y
        base_rot_z = q.z
        base_rot_w = q.w
        base_orientation = [base_rot_x, base_rot_y, base_rot_z, base_rot_w]
        base_p_x = p.x
        base_p_y = p.y
        base_p_z = p.z
        base_pos = [base_p_x, base_p_y, base_p_z]
        base_v_x = v.x
        base_v_y = v.y
        base_v_z = v.z
        base_vel = [base_v_x, base_v_y, base_v_z]
        base_w_x = w.x
        base_w_y = w.y
        base_w_z = w.z
        base_angular_vel = [base_w_x, base_w_y, base_w_z]
        return base_pos, base_orientation, base_vel, base_angular_vel

    def GetRampInclination(self):
        ramp_q = self.ramp_inclination
        ramp_rot_x = ramp_q.x
        ramp_rot_y = ramp_q.y
        ramp_rot_z = ramp_q.z
        ramp_rot_w = ramp_q.w
        ramp_orientation = [ramp_rot_x, ramp_rot_y, ramp_rot_z, ramp_rot_w]
        euler = self.transfrom_euler_from_quaternion(ramp_orientation)
        ramp_inclination = euler[1]  # ramp inclination pitch (around y-axis)
        return ramp_inclination

    def joints_state_callback(self, msg):
        global joints_states
        joints_states = msg

    def GetMotorAngles(self):
        global joints_states
        motor_angles = joints_states.position
        return motor_angles

    def GetMotorVelocities(self):
        global joints_states
        motor_velocities = joints_states.velocity
        return motor_velocities

    def GetMotorTorques(self):
        global joints_states
        motor_torques = joints_states.effort
        return motor_torques
        if episode_start:
            if self.saving_option:
                self.torques_step.append(joints_states.effort)

    def rf_toe_nan_callback(self, msg):
        self.RF_toe_isnan = msg.data

    def rh_toe_nan_callback(self, msg):
        self.RH_toe_isnan = msg.data

    def lf_toe_nan_callback(self, msg):
        self.LF_toe_isnan = msg.data

    def lh_toe_nan_callback(self, msg):
        self.LH_toe_isnan = msg.data

    def GetNanToeCheck(self):
        return [
            self.RF_toe_isnan, self.RH_toe_isnan, self.LF_toe_isnan,
            self.LH_toe_isnan
        ]

    # _______________Quaternion to Euler conversion_____________________________________

    def transfrom_euler_from_quaternion(self, quaternion):
        # input quaternion should be list example: [0.06146124, 0, 0, 0.99810947]
        q = np.array(quaternion[:4], dtype=np.float64, copy=True)
        nq = np.dot(q, q)  # gives scalar
        if nq < eps:
            H = np.identity(4)  # identity matrix of 4x4
        q *= math.sqrt(2.0 / nq)
        q = np.outer(q, q)
        H = np.array(
            ((1.0 - q[1, 1] - q[2, 2], q[0, 1] - q[2, 3], q[0, 2] + q[1, 3],
              0.0), (q[0, 1] + q[2, 3], 1.0 - q[0, 0] - q[2, 2],
                     q[1, 2] - q[0, 3], 0.0),
             (q[0, 2] - q[1, 3], q[1, 2] + q[0, 3], 1.0 - q[0, 0] - q[1, 1],
              0.0), (0.0, 0.0, 0.0, 1.0)),
            dtype=np.float64)  # Homogenous transformation matrix

        # Obtain euler angles from Homogenous transformation matrix:
        # Note that many Euler angle triplets can describe one matrix.
        # take only first three rows and first three coloumns = rotation matrix
        M = np.array(H, dtype=np.float64, copy=False)[:3, :3]
        i = 0  # x-axes (first coloumn)
        j = 1  # y-axes
        k = 2  # z-axis
        cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i])
        if cy > eps:
            ax = math.atan2(M[k, j], M[k, k])
            ay = math.atan2(-M[k, i], cy)
            az = math.atan2(M[j, i], M[i, i])
        else:
            ax = math.atan2(-M[j, k], M[j, j])
            ay = math.atan2(-M[k, i], cy)
            az = 0.0
        euler = [ax, ay, az]
        return euler  # roll, pitch ,yaw

    # __________________________________Get Observations____________________________________________________________
    def GetObservation(self):
        observation = []
        # returns base: position, orientation(in quaternion), linear velocity, angular velocity
        _, base_orientation_quaternion, _, euler_rate = self.GetLaelapsBaseInfo(
        )
        euler = self.transfrom_euler_from_quaternion(
            base_orientation_quaternion)
        observation.extend(euler)
        observation.extend(euler_rate)
        return observation

    # ____________________________________ROS wait_________________________________________________________________________________

    def ros_wait(self, t):
        # It makes the execution of the next command in the python-ROS node wait for t secs, equivalent to rospy.sleep(time)
        start = rospy.get_rostime()
        ros_time_start = start.secs + start.nsecs * 1e-9
        timeout = ros_time_start + t  # example 0.6 sec
        wait = True
        while wait:
            now = rospy.get_rostime()
            ros_time_now = now.secs + now.nsecs * 1e-9
            if ros_time_now >= timeout:
                wait = False

    # _______________Publish commands from ROS to Gazebo__________________________________________________________________________________

    def publish_threads_motor_angles(self, i, motor_angle):
        self.motor_pub[i].publish(motor_angle[i])

    def publish_motor_angles(self, motor_angle):

        threads = list()

        for index in range(8):
            # logging.info("Main    : create and start thread %d.", index)
            x = threading.Thread(target=self.publish_threads_motor_angles,
                                 args=(index, motor_angle))
            threads.append(x)
            x.start()

        for index, thread in enumerate(threads):
            # logging.info("Main    : before joining thread %d.", index)
            thread.join()
            # logging.info("Main    : thread %d done", index)

    def publish_threads_toe(self, i, toe_commands, phase):
        toe_class_msg.toex = toe_commands[i]  # toe_x
        toe_class_msg.toey = toe_commands[i + 4]  # toe_y
        toe_class_msg.phase = phase[i]
        self.toe_pub[i].publish(toe_class_msg)

    def pubilsh_toe_commands(self, toe_x_y, phase_shift):
        threads = list()

        for index in range(len(laelaps_feet)):
            # toe_class_msg=Toe()
            x = threading.Thread(target=self.publish_threads_toe,
                                 args=(index, toe_x_y, phase_shift))
            threads.append(x)
            x.start()

        for index, thread in enumerate(threads):
            thread.join()

    # _______________Reset function__________________________________________________________________________________
    def init_reset(self):
        global init_angles, init_toe_commands, init_phase_shift
        self.gazebo.unpauseSim()
        self.gazebo.resetSim()
        self.gazebo.pauseSim()
        self.gazebo.resetJoints(init_angles)
        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.pubilsh_toe_commands(init_toe_commands, init_phase_shift)
        # wait some time until robot stabilizes itself in the environment
        self.ros_wait(0.01)
        self.gazebo.pauseSim()
        self.episode_reward = 0
        return self.GetObservation()

    def reset(self):

        global init_angles, init_toe_commands, init_phase_shift, saving_path, episode_savingpath
        self.gazebo.unpauseSim()
        self.gazebo.resetSim()
        self.gazebo.pauseSim()
        # rospy.loginfo("-----RESET-----")
        self.gazebo.resetJoints(init_angles)  # reset joint angles
        self.gazebo.unpauseSim()

        # Reset JoinStateControlers because resetSim doesnt reset TFs, generating issues with simulation time
        self.controllers_object.reset_controllers()

        self.pubilsh_toe_commands(init_toe_commands, init_phase_shift)
        self.ros_wait(0.01)

        self.gazebo.pauseSim()
        # self.plot_tensorboard("Episode Total Rewards", self.episode_reward,self.frames)
        # Initialize Episode Values
        # reset the variable base position to 0
        self.last_base_position = [0.0, 0.0, 0.0]
        self.episode_step = 0
        self.episode_reward = 0

        self.episode_number += 1
        episode_savingpath = str(saving_path) + \
            "/Episode_" + str(self.episode_number)
        if self.saving_option:
            os.makedirs(episode_savingpath)

        self.save_time("Start")

        return self.GetObservation()  # initial state S_0

    # _______________Step function__________________________________________________________________________________
    def step(self, action):

        global x_high, x_low, y_high, y_low, step_phase_shift
        # Action values from NN from [-1,1] are mapped to the action space limits
        action_x = interp(action[0:4], [-1, 1], [x_low, x_high])
        action_y = interp(action[4:8], [-1, 1], [y_low, y_high])
        action = np.append([action_x], [action_y])

        # rospy.loginfo("------Step Action: %s",action)
        time_out = rospy.get_time() + step_time_out

        while True:

            self.gazebo.unpauseSim()
            self.pubilsh_toe_commands(action, step_phase_shift)
            self.gazebo.pauseSim()
            done = self.termination()
            if rospy.get_time() > time_out or done == True:
                break
            else:
                continue

        self.gazebo.pauseSim()

        reward = self.reward()

        self.episode_reward += reward
        # rospy.loginfo("-------------EnvFram idx %s : Reward in Env per Step %s-------------", self.frames ,reward)
        # Tensorboard
        # self.plot_tensorboard("env step r", reward,self.frames)

        self.frames += 1
        self.episode_step += 1

        return self.GetObservation(), reward, done, {}

    # ______________Episode Termination________________________________________________________________________________
    def termination(self):
        global episode_start
        pos, quaternion, _, _ = self.GetLaelapsBaseInfo()

        roll, pitch, yaw = self.transfrom_euler_from_quaternion(quaternion)

        if self.ramp_available:
            # pitch angle of the ramp, inclined slopes goal 3.2 m
            ramp_pitch = self.GetRampInclination()
            is_fallen = math.fabs(roll) > 0.3 or math.fabs(
                pitch) > 0.3 + math.fabs(ramp_pitch) or math.fabs(
                    yaw
                ) > 0.2 or math.fabs(pos[0]) >= self.env_goal or math.fabs(
                    pos[1]) > 0.2  # 3.2 m is nearly the end of the ramp
        else:
            ramp_pitch = 0  # flat terrain goal 6 m
            is_fallen = math.fabs(roll) > 0.3 or math.fabs(
                pitch) > 0.3 + math.fabs(ramp_pitch) or math.fabs(
                    yaw) > 0.2 or math.fabs(
                        pos[0]) >= self.env_goal or math.fabs(
                            pos[1]) > 0.2 or math.fabs(pos[2]) > 0.17

        if is_fallen:
            self.save_time("Finish")  # episode finish
            self.save_termination([
                math.fabs(roll),
                math.fabs(pitch),
                math.fabs(yaw),
                math.fabs(pos[0]),
                math.fabs(pos[1])
            ])
            rospy.loginfo(
                "ROll: %s : %s , Pitch: %s : %s , Yaw: %s : %s , X: %s : %s , Y: %s : %s , Z: %s : %s",
                math.fabs(roll),
                math.fabs(roll) > 0.3, math.fabs(pitch),
                math.fabs(pitch) > 0.3 + math.fabs(ramp_pitch), math.fabs(yaw),
                math.fabs(yaw) > 0.2, math.fabs(pos[0]),
                math.fabs(pos[0]) >= 3.2, math.fabs(pos[1]),
                math.fabs(pos[1]) >= 0.2, math.fabs(pos[2]),
                math.fabs(pos[2]) > 0.17)
            # SAVING
            self.save_torques(self.torques_step, self.frames)
            self.save_baze_zaxis(self.base_zaxis, self.frames)
            self.save_baze_x_y(self.base_x_y, self.frames)
            self.save_euler_angles(self.euler_angles, self.frames)
            self.save_angular_velocities(self.euler_rates, self.frames)
            self.save_sim_time(self.sim_t, self.frames)
            self.torques_step = []
            self.euler_angles = []
            self.euler_rates = []
            self.base_zaxis = []
            self.base_x_y = []
            self.sim_t = []
            episode_start = False
        return is_fallen

    # _______________Reward function__________________________________________________________________________________
    def reward(self):

        nan_check = self.GetNanToeCheck()
        if any(nan_check):
            rospy.loginfo(
                "---Angles Inverse Kinematics: NaN %s, try again---r=-1",
                nan_check)
            reward = -1
        else:
            if self.ramp_available:
                ramp_pitch = self.GetRampInclination(
                )  # pitch angle of the ramp
            else:
                ramp_pitch = 0
            current_base_position, _, _, _ = self.GetLaelapsBaseInfo()

            # Reward forward motion
            forward_reward = math.fabs(
                current_base_position[0] / math.cos(ramp_pitch)) - math.fabs(
                    self.last_base_position[0] / math.cos(ramp_pitch))
            # rospy.loginfo("current_base_position[0] %s self.last_base_position[0] %s forward_reward %s",math.fabs(current_base_position[0]),math.fabs(self.last_base_position[0]),forward_reward )

            # Penalize drifting
            drift_reward = math.fabs(current_base_position[1]) - math.fabs(
                self.last_base_position[1])
            # rospy.loginfo("current_base_position[1] %s self.last_base_position[1] %s drift_reward %s",math.fabs(current_base_position[1]),math.fabs(self.last_base_position[1]),drift_reward )

            self.last_base_position = current_base_position

            reward = (self.distance_weight * forward_reward -
                      self.drift_weight * drift_reward)
        return reward

    # ______________Saving values and ploting tensorboard_____________________________________________-

    def tensorboardwriter(self, folder, use_tensorboardEnV,
                          use_tensorboardAlg):
        self.enable_tensorboard = use_tensorboardEnV
        if use_tensorboardEnV or use_tensorboardAlg:
            self.writer = SummaryWriter(folder)
        else:
            self.writer = []
        return self.writer

    def savingpath(self, folder, saving_option):
        global saving_path
        self.saving_option = saving_option
        saving_path = folder

    def plot_tensorboard(self, title, var1, var2):
        if self.enable_tensorboard:
            self.writer.add_scalar(title, var1, var2)

    def save_actions(self, action, step_number):
        global episode_savingpath
        if self.saving_option:
            action = np.array(action, dtype=np.float64)
            np.save(str(episode_savingpath) + "/" + str(step_number), action)

    def save_torques(self, torques, step_number):
        global episode_savingpath
        if self.saving_option:
            torques = np.array(torques, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/Torques_" + str(step_number),
                torques)

    def save_euler_angles(self, euler_angles, step_number):
        global episode_savingpath
        if self.saving_option:
            euler_angles = np.array(euler_angles, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/EulerAngles_" + str(step_number),
                euler_angles)

    def save_angular_velocities(self, angular_velocities, step_number):
        global episode_savingpath
        if self.saving_option:
            angular_velocities = np.array(angular_velocities, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/AngularVelocities_" +
                str(step_number), angular_velocities)

    def save_baze_zaxis(self, base_zaxis, step_number):
        global episode_savingpath
        if self.saving_option:
            base_zaxis = np.array(base_zaxis, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/BaseZaxis_" + str(step_number),
                base_zaxis)

    def save_baze_x_y(self, base_x_y, step_number):
        global episode_savingpath
        if self.saving_option:
            base_x_y = np.array(base_x_y, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/BaseX_Y" + str(step_number),
                base_x_y)

    def save_sim_time(self, sim_t, step_number):
        global episode_savingpath
        if self.saving_option:
            sim_t = np.array(sim_t, dtype=np.float64)
            np.save(
                str(episode_savingpath) + "/Sim_t" + str(step_number), sim_t)

    def save_time(self, start_or_finish):
        global episode_savingpath
        if self.saving_option:
            episode_time = str(time.ctime(time.time()))
            np.save(
                str(episode_savingpath) + "/Episode_" + str(start_or_finish),
                episode_time)

    def save_termination(self, termination_critiria):
        global episode_savingpath
        if self.saving_option:
            np.save(
                str(episode_savingpath) + "/termination", termination_critiria)

    def close(self):
        rospy.logdebug("Closing RobotGazeboEnvironment")
        rospy.signal_shutdown("Closing RobotGazeboEnvironment")
def position_reset_test():
    """
    Test of position accuracy and reset system.
    """
    position = 0.0
    position = float(sys.argv[1])

    rospy.init_node('debug_test_node', anonymous=True, log_level=rospy.WARN)
    rospy.logwarn("[position=" + str(position) + "]")
    wait_time = 0.1
    controllers_object = ControllersConnection(namespace="cartpole_v0")

    debug_object = MoveCartClass()

    gazebo = GazeboConnection()
    rospy.loginfo("RESETING SIMULATION")
    gazebo.pauseSim()
    gazebo.resetSim()
    gazebo.unpauseSim()
    rospy.loginfo("CLOCK AFTER RESET")
    debug_object.get_clock_time()
    rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
    controllers_object.reset_controllers()
    rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
    debug_object.get_clock_time()
    rospy.loginfo("START CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("SET init pose...")
    debug_object.set_init_pose()
    rospy.loginfo("WAIT FOR GOING TO INIT POSE")
    time.sleep(wait_time)

    raw_input("Start Movement...PRESS KEY")
    i = 0
    wait_times_m = []
    while not rospy.is_shutdown():
        pos_x = [position]
        debug_object.move_joints(pos_x)
        delta_time = debug_object.wait_until_base_is_in_pos(position)
        wait_times_m = numpy.append(wait_times_m, [delta_time])
        debug_object.move_joints([0.0])
        delta_time = debug_object.wait_until_base_is_in_pos(0.0)
        wait_times_m = numpy.append(wait_times_m, [delta_time])
        i += 1
        if i > 10:
            average_time = numpy.average(wait_times_m)
            rospy.logwarn("[average_time Wait Time=" + str(average_time) + "]")
            break

    rospy.loginfo("END CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("CLOCK BEFORE RESET")
    debug_object.get_clock_time()

    # Reset Sim
    raw_input("END Start Movement...PRESS KEY")
    rospy.loginfo("SETTING INITIAL POSE TO AVOID")
    debug_object.set_init_pose()
    time.sleep(wait_time * 2.0)
    rospy.loginfo("AFTER INITPOSE CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo(
        "We deactivate gravity to check any reasidual effect of reseting the simulation"
    )
    gazebo.change_gravity(0.0, 0.0, 0.0)

    rospy.loginfo("RESETING SIMULATION")
    gazebo.pauseSim()
    gazebo.resetSim()
    gazebo.unpauseSim()
    rospy.loginfo("CLOCK AFTER RESET")
    debug_object.get_clock_time()

    rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
    controllers_object.reset_controllers()
    rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
    debug_object.get_clock_time()
    rospy.loginfo("We reactivating gravity...")
    gazebo.change_gravity(0.0, 0.0, -9.81)
    rospy.loginfo("END")
Beispiel #4
0
class OldMouseEnv(gym.Env):

    def __init__(self):



        number_actions = rospy.get_param('/mouse/n_actions')
        self.action_space = spaces.Discrete(number_actions)

        self._seed()

        #get configuration parameters
        self.init_roll_vel = rospy.get_param('/mouse/init_roll_vel')

        # Actions
        self.roll_speed_fixed_value = rospy.get_param('/mouse/roll_speed_fixed_value')
        self.roll_speed_increment_value = rospy.get_param('/mouse/roll_speed_increment_value')

        self.start_point = Point()
        self.start_point.x = rospy.get_param("/mouse/init_mouse_pose/x")
        self.start_point.y = rospy.get_param("/mouse/init_mouse_pose/y")
        self.start_point.z = rospy.get_param("/mouse/init_mouse_pose/z")

        # Done
        self.max_pitch_angle = rospy.get_param('/mouse/max_pitch_angle')

        # Rewards
        self.move_distance_reward_weight = rospy.get_param("/mouse/move_distance_reward_weight")
        self.y_linear_speed_reward_weight = rospy.get_param("/mouse/y_linear_speed_reward_weight")
        self.y_axis_angle_reward_weight = rospy.get_param("/mouse/y_axis_angle_reward_weight")
        self.end_episode_points = rospy.get_param("/mouse/end_episode_points")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()
        self.controllers_list = ['joint_state_controller',
                         'right_front_diff_drive_controller'
                         ]
        self.controllers_object = ControllersConnection(namespace="mouse",
                                                        controllers_list=self.controllers_list)


        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()

        rospy.Subscriber("/mouse/joint_states", JointState, self.joints_callback)
        rospy.Subscriber("/mouse/odom", Odometry, self.odom_callback)


        self._roll_vel_pub = rospy.Publisher('/mouse/right_front_diff_drive_controller/command', Float64, queue_size=1)
        self.check_publishers_connection()

        self.gazebo.pauseSim()


    def _seed(self, seed=None): #overriden function
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):#overriden function

        self.gazebo.unpauseSim()
        self.set_action(action)
        self.gazebo.pauseSim()
        obs = self._get_obs()
        done = self._is_done(obs)
        info = {}
        reward = self.compute_reward(obs, done)
        simplified_obs = self.convert_obs_to_state(obs)

        return simplified_obs, reward, done, info


    def reset(self):

        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()
        self.set_init_pose()
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()
        self.controllers_object.reset_controllers()
        self.check_all_sensors_ready()
        self.gazebo.pauseSim()
        self.init_env_variables()
        obs = self._get_obs()
        simplified_obs = self.convert_obs_to_state(obs)

        return simplified_obs


    def init_env_variables(self):
        """
        Inits variables needed to be initialised each time we reset at the start
        of an episode.
        :return:
        """
        self.total_distance_moved = 0.0
        self.current_y_distance = self.get_y_dir_distance_from_start_point(self.start_point)
        self.roll_turn_speed = rospy.get_param('/mouse/init_roll_vel')

    def _is_done(self, observations):

        pitch_angle = observations[3]

        if abs(pitch_angle) > self.max_pitch_angle:
            rospy.logerr("WRONG Mouse Pitch Orientation==>" + str(pitch_angle))
            done = True
        else:
            rospy.logdebug("Mouse Pitch Orientation Ok==>" + str(pitch_angle))
            done = False

        return done

    def set_action(self, action):

        # We convert the actions to speed movements to send to the parent class MouseSingleDiskEnv
        if action == 0:# Move Speed Wheel Forwards
            self.roll_turn_speed = self.roll_speed_fixed_value
        elif action == 1:# Move Speed Wheel Backwards
            self.roll_turn_speed = self.roll_speed_fixed_value
        elif action == 2:# Stop Speed Wheel
            self.roll_turn_speed = 0.0
        elif action == 3:# Increment Speed
            self.roll_turn_speed += self.roll_speed_increment_value
        elif action == 4:# Decrement Speed
            self.roll_turn_speed -= self.roll_speed_increment_value

        # We clamp Values to maximum
        rospy.logdebug("roll_turn_speed before clamp=="+str(self.roll_turn_speed))
        self.roll_turn_speed = numpy.clip(self.roll_turn_speed,
                                          -1*self.roll_speed_fixed_value,
                                          self.roll_speed_fixed_value)
        rospy.logdebug("roll_turn_speed after clamp==" + str(self.roll_turn_speed))

        # We tell the OneDiskMouse to spin the RollDisk at the selected speed
        self.move_joints(self.roll_turn_speed)


    def _get_obs(self):
        """
        Here we define what sensor data defines our robots observations
        To know which Variables we have acces to, we need to read the
        MouseSingleDiskEnv API DOCS
        :return:
        """

        # We get the orientation of the mouse in RPY
        roll, pitch, yaw = self.get_orientation_euler()

        # We get the distance from the origin
        y_distance = self.get_y_dir_distance_from_start_point(self.start_point)

        # We get the current speed of the Roll Disk
        current_disk_roll_vel = self.get_roll_velocity()

        # We get the linear speed in the y axis
        y_linear_speed = self.get_y_linear_speed()

        mouse_observations = [
            round(current_disk_roll_vel, 0),
            round(y_distance, 1),
            round(roll, 1),
            round(pitch, 1),
            round(y_linear_speed,1),
            round(yaw, 1),
        ]

        return mouse_observations


    def get_orientation_euler(self):
        # We convert from quaternions to euler
        orientation_list = [self.odom.pose.pose.orientation.x,
                            self.odom.pose.pose.orientation.y,
                            self.odom.pose.pose.orientation.z,
                            self.odom.pose.pose.orientation.w]

        roll, pitch, yaw = euler_from_quaternion(orientation_list)
        return roll, pitch, yaw

    def get_roll_velocity(self):
        # We get the current joint roll velocity
        roll_vel = self.joints.velocity[0]
        return roll_vel

    def get_y_linear_speed(self):
        # We get the current joint roll velocity
        y_linear_speed = self.odom.twist.twist.linear.y
        return y_linear_speed


    def get_y_dir_distance_from_start_point(self, start_point):
        """
        Calculates the distance from the given point and the current position
        given by odometry. In this case the increase or decrease in y.
        :param start_point:
        :return:
        """
        y_dist_dir = self.odom.pose.pose.position.y - start_point.y

        return y_dist_dir

    def compute_reward(self, observations, done):

        if not done:

            y_distance_now = observations[1]
            delta_distance = y_distance_now - self.current_y_distance
            rospy.logdebug("y_distance_now=" + str(y_distance_now)+", current_y_distance=" + str(self.current_y_distance))
            rospy.logdebug("delta_distance=" + str(delta_distance))
            reward_distance = delta_distance * self.move_distance_reward_weight
            self.current_y_distance = y_distance_now

            y_linear_speed = observations[4]
            rospy.logdebug("y_linear_speed=" + str(y_linear_speed))
            reward_y_axis_speed = y_linear_speed * self.y_linear_speed_reward_weight

            # Negative Reward for yaw different from zero.
            yaw_angle = observations[5]
            rospy.logdebug("yaw_angle=" + str(yaw_angle))
            # Worst yaw is 90 and 270 degrees, best 0 and 180. We use sin function for giving reward.
            sin_yaw_angle = math.sin(yaw_angle)
            rospy.logdebug("sin_yaw_angle=" + str(sin_yaw_angle))
            reward_y_axis_angle = -1 * abs(sin_yaw_angle) * self.y_axis_angle_reward_weight


            # We are not intereseted in decimals of the reward, doesnt give any advatage.
            reward = round(reward_distance, 0) + round(reward_y_axis_speed, 0) + round(reward_y_axis_angle, 0)
            rospy.logdebug("reward_distance=" + str(reward_distance))
            rospy.logdebug("reward_y_axis_speed=" + str(reward_y_axis_speed))
            rospy.logdebug("reward_y_axis_angle=" + str(reward_y_axis_angle))
            rospy.logdebug("reward=" + str(reward))
        else:
            reward = -1*self.end_episode_points

        return reward


    def joints_callback(self, data):
        self.joints = data

    def odom_callback(self, data):
        self.odom = data


    def check_all_sensors_ready(self):
        self.check_joint_states_ready()
        self.check_odom_ready()
        rospy.logdebug("ALL SENSORS READY")

    def check_joint_states_ready(self):
        self.joints = None
        while self.joints is None and not rospy.is_shutdown():
            try:
                self.joints = rospy.wait_for_message("/mouse/joint_states", JointState, timeout=1.0)
                rospy.logdebug("Current mouse/joint_states READY=>" + str(self.joints))

            except:
                rospy.logerr("Current mouse/joint_states not ready yet, retrying for getting joint_states")
        return self.joints

    def check_odom_ready(self):
        self.odom = None
        while self.odom is None and not rospy.is_shutdown():
            try:
                self.odom = rospy.wait_for_message("/mouse/odom", Odometry, timeout=1.0)
                rospy.logdebug("Current /mouse/odom READY=>" + str(self.odom))

            except:
                rospy.logerr("Current /mouse/odom not ready yet, retrying for getting odom")

        return self.odom

    def check_publishers_connection(self):
        """
        Checks that all the publishers are working
        :return:
        """
        rate = rospy.Rate(10)  # 10hz
        while (self._roll_vel_pub.get_num_connections() == 0 and not rospy.is_shutdown()):
            rospy.logdebug("No susbribers to _roll_vel_pub yet so we wait and try again")
            try:
                rate.sleep()
            except rospy.ROSInterruptException:
                # This is to avoid error when world is rested, time when backwards.
                pass
        rospy.logdebug("_base_pub Publisher Connected")

        rospy.logdebug("All Publishers READY")

    def move_joints(self, roll_speed):
        joint_speed_value = Float64()
        joint_speed_value.data = roll_speed
        rospy.logdebug("Single Disk Roll Velocity>>" + str(joint_speed_value))
        self._roll_vel_pub.publish(joint_speed_value)
        self.wait_until_roll_is_in_vel(joint_speed_value.data)

    def wait_until_roll_is_in_vel(self, velocity):

        rate = rospy.Rate(10)
        start_wait_time = rospy.get_rostime().to_sec()
        end_wait_time = 0.0
        epsilon = 0.1
        v_plus = velocity + epsilon
        v_minus = velocity - epsilon
        while not rospy.is_shutdown():
            joint_data = self.check_joint_states_ready()
            roll_vel = joint_data.velocity[0]
            rospy.logdebug("VEL=" + str(roll_vel) + ", ?RANGE=[" + str(v_minus) + ","+str(v_plus)+"]")
            are_close = (roll_vel <= v_plus) and (roll_vel > v_minus)
            if are_close:
                rospy.logdebug("Reached Velocity!")
                end_wait_time = rospy.get_rostime().to_sec()
                break
            rospy.logdebug("Not there yet, keep waiting...")
            rate.sleep()
        delta_time = end_wait_time- start_wait_time
        rospy.logdebug("[Wait Time=" + str(delta_time)+"]")
        return delta_time


    def set_init_pose(self):
        """Sets the Robot in its init pose
        """
        self.move_joints(self.init_roll_vel)

        return True


    def convert_obs_to_state(self,observations):
        """
        Converts the observations used for reward and so on to the essentials for the robot state
        In this case we only need the orientation of the mouse and the speed of the disc.
        The distance doesnt condition at all the actions
        """
        disk_roll_vel = observations[0]
        y_linear_speed = observations[4]
        yaw_angle = observations[5]

        state_converted = [disk_roll_vel, y_linear_speed, yaw_angle]

        return state_converted
def velocity_test():
    """
    Test of the time it takes to reach a certain speed
    """

    speed_value = 10.0
    wait_time = 0.1

    rospy.init_node('cartpole_speed_test_node',
                    anonymous=True,
                    log_level=rospy.WARN)

    # Controllers Info
    controllers_list = [
        'joint_state_controller',
        'pole_joint_velocity_controller',
        'foot_joint_velocity_controller',
    ]
    robot_name_space = "cartpole_v0"
    controllers_object = ControllersConnection(
        namespace=robot_name_space, controllers_list=controllers_list)

    debug_object = MoveCartClass()

    start_init_physics_parameters = True
    reset_world_or_sim = "SIMULATION"
    gazebo = GazeboConnection(start_init_physics_parameters,
                              reset_world_or_sim)

    rospy.loginfo("RESETING SIMULATION")
    gazebo.pauseSim()
    gazebo.resetSim()
    gazebo.unpauseSim()
    rospy.loginfo("CLOCK AFTER RESET")
    debug_object.get_clock_time()
    rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
    controllers_object.reset_controllers()
    rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
    debug_object.get_clock_time()
    rospy.loginfo("START CHECKING SENSOR DATA")
    debug_object.check_all_systems_ready()
    rospy.loginfo("SET init pose...")
    debug_object.set_init_pose()
    rospy.loginfo("WAIT FOR GOING TO INIT POSE")
    time.sleep(wait_time)

    raw_input("Start Movement...PRESS KEY")
    i = 0
    wait_times_m = []
    while not rospy.is_shutdown():
        vel_x = [speed_value]

        rospy.loginfo("Moving RIGHT...")
        debug_object.move_joints([speed_value])
        delta_time = debug_object.wait_until_base_is_in_vel([speed_value])
        wait_times_m = numpy.append(wait_times_m, [delta_time])

        rospy.loginfo("Moving STOP...")
        debug_object.move_joints([0.0])
        delta_time = debug_object.wait_until_base_is_in_vel(0.0)
        wait_times_m = numpy.append(wait_times_m, [delta_time])

        raw_input("Start Movement...PRESS KEY")

        rospy.loginfo("Moving LEFT...")
        debug_object.move_joints([-1 * speed_value])
        delta_time = debug_object.wait_until_base_is_in_vel([-1 * speed_value])
        wait_times_m = numpy.append(wait_times_m, [delta_time])

        rospy.loginfo("Moving STOP...")
        debug_object.move_joints([0.0])
        delta_time = debug_object.wait_until_base_is_in_vel(0.0)
        wait_times_m = numpy.append(wait_times_m, [delta_time])

        raw_input("Start Movement...PRESS KEY")

        i += 1

        if i > 10:
            average_time = numpy.average(wait_times_m)
            rospy.logwarn("[average_time Wait Time=" + str(average_time) + "]")
            break
Beispiel #6
0
class RobotGazeboEnv(gym.Env):
    def __init__(self,
                 robot_name_space,
                 controllers_list,
                 reset_controls,
                 start_init_physics_parameters=True,
                 reset_world_or_sim="SIMULATION"):

        # To reset Simulations
        rospy.logdebug("START init RobotGazeboEnv")
        self.gazebo = GazeboConnection(start_init_physics_parameters,
                                       reset_world_or_sim)
        self.controllers_object = ControllersConnection(
            namespace=robot_name_space, controllers_list=controllers_list)
        self.reset_controls = reset_controls
        self.seed()

        # Set up ROS related variables
        self.episode_num = 0
        self.cumulated_episode_reward = 0
        self.reward_pub = rospy.Publisher('/openai/reward',
                                          RLExperimentInfo,
                                          queue_size=1)
        rospy.logdebug("END init RobotGazeboEnv")

    # Env methods
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Function executed each time step.
        Here we get the action execute it in a time step and retrieve the
        observations generated by that action.
        :param action:
        :return: obs, reward, done, info
        """
        """
        Here we should convert the action num to movement action, execute the action in the
        simulation and get the observations result of performing that action.
        """
        rospy.logdebug("START STEP OpenAIROS")

        self.gazebo.unpauseSim()
        self._set_action(action)
        self.gazebo.pauseSim()
        obs = self._get_obs()
        done = self._is_done(obs)
        info = {}
        reward = self._compute_reward(obs, done)
        self.cumulated_episode_reward += reward

        rospy.logdebug("END STEP OpenAIROS")

        return obs, reward, done, info

    def reset(self):
        rospy.logdebug("Reseting RobotGazeboEnvironment")
        self._reset_sim()
        self._init_env_variables()
        self._update_episode()
        obs = self._get_obs()
        rospy.logdebug("END Reseting RobotGazeboEnvironment")
        return obs

    def close(self):
        """
        Function executed when closing the environment.
        Use it for closing GUIS and other systems that need closing.
        :return:
        """
        rospy.logdebug("Closing RobotGazeboEnvironment")
        rospy.signal_shutdown("Closing RobotGazeboEnvironment")

    def _update_episode(self):
        """
        Publishes the cumulated reward of the episode and 
        increases the episode number by one.
        :return:
        """
        rospy.logwarn("PUBLISHING REWARD...")
        self._publish_reward_topic(self.cumulated_episode_reward,
                                   self.episode_num)
        rospy.logwarn("PUBLISHING REWARD...DONE=" +
                      str(self.cumulated_episode_reward) + ",EP=" +
                      str(self.episode_num))

        self.episode_num += 1
        self.cumulated_episode_reward = 0

    def _publish_reward_topic(self, reward, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)

    # Extension methods
    # ----------------------------
    def _reset_sim(self):
        """Resets a simulation
        """
        rospy.logdebug("RESET SIM START")
        if self.reset_controls:
            rospy.logdebug("RESET CONTROLLERS")
            self.gazebo.unpauseSim()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self._set_init_pose()
            self.gazebo.pauseSim()
            self.gazebo.resetSim()
            self.gazebo.unpauseSim()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self.gazebo.pauseSim()

        else:
            rospy.logwarn("DONT RESET CONTROLLERS")
            self.gazebo.unpauseSim()
            self._check_all_systems_ready()
            self._set_init_pose()
            self.gazebo.pauseSim()
            self.gazebo.resetSim()
            self.gazebo.unpauseSim()
            self._check_all_systems_ready()
            self.gazebo.pauseSim()

        rospy.logdebug("RESET SIM END")
        return True

    def _set_init_pose(self):
        """Sets the Robot in its init pose
        """
        raise NotImplementedError()

    def _check_all_systems_ready(self):
        """
        Checks that all the sensors, publishers and other simulation systems are
        operational.
        """
        raise NotImplementedError()

    def _get_obs(self):
        """Returns the observation.
        """
        raise NotImplementedError()

    def _init_env_variables(self):
        """Inits variables needed to be initialised each time we reset at the start
        of an episode.
        """
        raise NotImplementedError()

    def _set_action(self, action):
        """Applies the given action to the simulation.
        """
        raise NotImplementedError()

    def _is_done(self, observations):
        """Indicates whether or not the episode is done ( the robot has fallen for example).
        """
        raise NotImplementedError()

    def _compute_reward(self, observations, done):
        """Calculates the reward to give based on the observations given.
        """
        raise NotImplementedError()

    def _env_setup(self, initial_qpos):
        """Initial configuration of the environment. Can be used to configure initial state
        and extract information from the simulation.
        """
        raise NotImplementedError()
Beispiel #7
0
class CartPole3DEnv(gym.Env):

    def __init__(self):
        self.publishers_array = []
        self._base_pub = rospy.Publisher('/cart_pole_3d/cart_joint_velocity_controller/command', Float64, queue_size=1)
        # self._pole_pub = rospy.Publisher('/cartpole_v0/pole_joint_velocity_controller/command', Float64, queue_size=1)
        self.publishers_array.append(self._base_pub)
        # self.publishers_array.append(self._pole_pub)

        self.action_space = spaces.Discrete(2) #l,r,L,R,nothing
        self._seed()

        #get configuration parameters
        self.min_pole_angle = rospy.get_param('/cart_pole_3d/min_angle')
        self.max_pole_angle = rospy.get_param('/cart_pole_3d/max_angle')
        self.max_base_velocity = rospy.get_param('/cart_pole_3d/cart_speed_fixed_value')
        self.min_base_position = -rospy.get_param('/cart_pole_3d/max_distance')
        self.max_base_position = rospy.get_param('/cart_pole_3d/max_distance')
        self.pos_step = rospy.get_param('/cart_pole_3d/pos_step')
        self.running_step = rospy.get_param('/cart_pole_3d/running_step')
        self.init_pos = rospy.get_param('/cart_pole_3d/init_cart_vel')
        self.wait_time = rospy.get_param('/cart_pole_3d/wait_time')

        self.init_internal_vars(self.init_pos)

        rospy.Subscriber("/cart_pole_3d/joint_states", JointState, self.joints_callback)

        # stablishes connection with simulator
        self.controllers_list = ['cart_joint_velocity_controller']


        self.gazebo = GazeboConnection()
        self.controllers_object = ControllersConnection(namespace="cart_pole_3d",
                                    controllers_list=self.controllers_list)

    def init_internal_vars(self, init_pos_value):
        self.pos = [init_pos_value]
        self.joints = None

    #always returns the current state of the joints
    def joints_callback(self, data):
        self.joints = data

    def get_clock_time(self):
        self.clock_time = None
        while self.clock_time is None and not rospy.is_shutdown():
            try:
                self.clock_time = rospy.wait_for_message("/clock", Clock, timeout=1.0)
                rospy.logdebug("Current clock_time READY=>" + str(self.clock_time))
            except:
                rospy.logdebug("Current clock_time not ready yet, retrying for getting Current clock_time")
        return self.clock_time

    def _seed(self, seed=None): #overriden function
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):#overriden function

        # Take action
        if action == 0: #LEFT
            rospy.logwarn("GO LEFT...")
            self.pos[0] -= self.pos_step
        elif action == 1: #RIGHT
            rospy.logwarn("GO RIGHT...")
            self.pos[0] += self.pos_step

        rospy.logwarn("MOVING TO POS=="+str(self.pos))

        # 1st: unpause simulation
        rospy.logdebug("Unpause SIM...")
        self.gazebo.unpauseSim()

        self.move_joints(self.pos)
        rospy.logdebug("Wait for some time to execute movement, time="+str(self.running_step))
        rospy.sleep(self.running_step) #wait for some time
        rospy.logdebug("DONE Wait for some time to execute movement, time=" + str(self.running_step))

        # 3rd: pause simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # 4th: get the observation
        observation, done, state = self.observation_checks()

        # 5th: get the reward
        if not done:
            step_reward = 0
            obs_reward = self.get_reward_for_observations(state)
            rospy.loginfo("Reward Values: Time="+str(step_reward)+",Obs="+str(obs_reward))
            reward = step_reward + int(obs_reward)
            rospy.loginfo("TOT Reward=" + str(reward))
        else:
            reward = -2000000

        now = rospy.get_rostime()

        # TODO: Seems that gym version 0.8 doesnt support this very well.
        return observation, reward, done, {}

    def _reset(self):
        rospy.loginfo("We UNPause the simulation to start having topic data")
        self.gazebo.unpauseSim()

        rospy.loginfo("CLOCK BEFORE RESET")
        self.get_clock_time()

        rospy.loginfo("AFTER INITPOSE CHECKING SENSOR DATA")
        # self.check_all_systems_ready()
        self.check_joint_states_ready()
        rospy.loginfo("SETTING INITIAL POSE TO AVOID")
        self.set_init_pose()
        time.sleep(self.wait_time * 2.0)

        #rospy.logdebug("We deactivate gravity to check any reasidual effect of reseting the simulation")
        #self.gazebo.change_gravity(0.0, 0.0, 0.0)

        rospy.loginfo("RESETING SIMULATION")
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()
        rospy.loginfo("CLOCK AFTER RESET")
        self.get_clock_time()

        rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK")
        self.controllers_object.reset_controllers()
        rospy.loginfo("AFTER RESET CHECKING SENSOR DATA")
        # self.check_all_systems_ready()
        self.check_joint_states_ready()
        rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
        self.get_clock_time()
        #rospy.logdebug("We reactivating gravity...")
        #self.gazebo.change_gravity(0.0, 0.0, -9.81)
        rospy.loginfo("END")

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        # get the last observation got when paused, generated by the callbakc or the check_all_systems_ready
        # Depends on who was last
        observation, _, state = self.observation_checks()

        return observation


    '''
    UTILITY CODE FOLLOWS HERE
    '''

    def observation_checks(self):
        done = False
        data = self.joints
        #       base_postion            base_velocity           pole angle              pole velocity
        state = [round(data.position[1],1), round(data.velocity[1],1), round(data.position[0],3), round(data.velocity[0],3)]
        #   pole angle  pole velocity
        rospy.loginfo("BASEPOSITION=="+str(state[0]))
        rospy.loginfo("POLE ANGLE==" + str(state[2]))
        if (self.min_base_position >= state[0] or state[0] >= self.max_base_position): #check if the base is still within the ranges of (-2, 2)
            rospy.logerr("Base Ouside Limits==>min="+str(self.min_base_position)+",pos="+str(state[0])+",max="+str(self.max_base_position))
            done = True
        if (self.min_pole_angle >= state[2] or state[2] >= self.max_pole_angle): #check if pole has toppled over
            rospy.logerr(
                "Pole Angle Ouside Limits==>min=" + str(self.min_pole_angle) + ",pos=" + str(state[2]) + ",max=" + str(
                    self.max_pole_angle))
            done = True

        observations = [state[2]]

        return observations, done, state

    def get_reward_for_observations(self, state):
        """
        Gives more points for staying upright, gets data from given observations to avoid
        having different data than other previous functions
        :return:reward
        """

        pole_angle = state[2]
        pole_vel = state[3]

        rospy.logwarn("pole_angle for reward==>" + str(pole_angle))
        delta = 0.7 - abs(pole_angle)
        reward_pole_angle = math.exp(delta*10)

        # If we are moving to the left and the pole is falling left is Bad
        rospy.logwarn("pole_vel==>" + str(pole_vel))
        pole_vel_sign = numpy.sign(pole_vel)
        pole_angle_sign = numpy.sign(pole_angle)
        rospy.logwarn("pole_vel sign==>" + str(pole_vel_sign))
        rospy.logwarn("pole_angle sign==>" + str(pole_angle_sign))

        # We want inverted signs for the speeds. We multiply by -1 to make minus positive.
        # global_sign + = GOOD, global_sign - = BAD
        base_reward = 500
        if pole_vel != 0:
            global_sign = pole_angle_sign * pole_vel_sign * -1
            reward_for_efective_movement = base_reward * global_sign
        else:
            # Is a particular case. If it doesnt move then its good also
            reward_for_efective_movement = base_reward

        reward = reward_pole_angle + reward_for_efective_movement

        rospy.logwarn("reward==>" + str(reward)+"= r_pole_angle="+str(reward_pole_angle)+",r_movement= "+str(reward_for_efective_movement))
        return reward


    def check_publishers_connection(self):
        """
        Checks that all the publishers are working
        :return:
        """
        rate = rospy.Rate(10)  # 10hz
        while (self._base_pub.get_num_connections() == 0 and not rospy.is_shutdown()):
            rospy.loginfo("No susbribers to _base_pub yet so we wait and try again")
            try:
                rate.sleep()
            except rospy.ROSInterruptException:
                # This is to avoid error when world is rested, time when backwards.
                pass
        rospy.loginfo("_base_pub Publisher Connected")

    def check_all_systems_ready(self, init=True):
        self.base_position = None
        while self.base_position is None and not rospy.is_shutdown():
            try:
                self.base_position = rospy.Subscriber("/cart_pole_3d/joint_states", JointState, timeout=1.0)
                rospy.logdebug("Current /cart_pole_3d/joint_states READY=>"+str(self.base_position))
                if init:
                    # We Check all the sensors are in their initial values
                    positions_ok = all(abs(i) <= 1.0e-02 for i in self.base_position.position)
                    velocity_ok = all(abs(i) <= 1.0e-02 for i in self.base_position.velocity)
                    efforts_ok = all(abs(i) <= 1.0e-01 for i in self.base_position.effort)
                    base_data_ok = positions_ok and velocity_ok and efforts_ok
                    rospy.logdebug("Checking Init Values Ok=>" + str(base_data_ok))
            except:
                rospy.logerr("Current /cart_pole_3d/joint_states not ready yet, retrying for getting joint_states")
        rospy.logdebug("ALL SYSTEMS READY")

    def check_joint_states_ready(self):
        self.base_position = None
        while self.base_position is None and not rospy.is_shutdown():
            try:
                self.base_position = rospy.wait_for_message("/cart_pole_3d/joint_states", JointState, timeout=1.0)
                # check response from this topic for 1 second. if don't received respone, it mean not ready.
                # Assure data channels are working perfectly.
                rospy.logdebug("Current /cart_pole_3d/joint_states READY=>" + str(self.base_position))

            except:
                rospy.logerr("Current /cart_pole_3d/joint_states not ready yet, retrying for getting joint_states")


    def move_joints(self, joints_array):
        joint_value = Float64()
        joint_value.data = joints_array[0]
        rospy.logdebug("Single Base JointsPos>>"+str(joint_value))
        self._base_pub.publish(joint_value)


    def set_init_pose(self):
        """
        Sets joints to initial position [0,0,0]
        :return:
        """
        self.check_publishers_connection()
        # Reset Internal pos variable
        self.init_internal_vars(self.init_pos)
        self.move_joints(self.pos)
Beispiel #8
0
class BaseEnv(gym.Env):
    def __init__(self,
                 robot_name_space,
                 controllers_list,
                 reset_controls,
                 start_init_physics_parameters=True,
                 reset_world_or_sim="SIMULATION"):

        rospy.logdebug("START init BaseEnv")
        self.controllers_object = ControllersConnection(
            namespace=robot_name_space, controllers_list=controllers_list)
        self.reset_controls = reset_controls
        self.seed()

        rospy.logdebug("END init BaseEnv")

    # Env methods
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Function executed each time step.
        Here we get the action execute it in a time step and retrieve the
        observations generated by that action.
        :param action:
        :return: obs, reward, done, info
        """
        """
        Here we should convert the action num to movement action, execute the action in the
        simulation and get the observations result of performing that action.
        """
        rospy.logdebug("START STEP ROS")

        self._set_action(action)
        obs = self._get_obs()
        done = self._is_done(obs)
        info = {}
        reward = self._compute_reward(obs, done)

        rospy.logdebug("END STEP ROS")

        return obs, reward, done, info

    def reset(self):
        rospy.logdebug("Reseting BaseEnv")
        self._init_env_variables()
        obs = self._get_obs()
        rospy.logdebug("END Reseting BaseEnv")
        return obs

    def close(self):
        """
        Function executed when closing the environment.
        Use it for closing GUIS and other systems that need closing.
        :return:
        """
        rospy.logdebug("Closing BaseEnv")
        rospy.signal_shutdown("Closing BaseEnv")

    def _reset_sim(self):
        """Resets a simulation
        """
        rospy.logdebug("RESET SIM START")
        if self.reset_controls:
            rospy.logdebug("RESET CONTROLLERS")
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self._set_init_pose()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
        else:
            rospy.logwarn("DONT RESET CONTROLLERS")
            self._check_all_systems_ready()
            self._set_init_pose()
            self._check_all_systems_ready()

        rospy.logdebug("RESET SIM END")
        return True

    def _set_init_pose(self):
        """Sets the Robot in its init pose
        """
        raise NotImplementedError()

    def _check_all_systems_ready(self):
        """
        Checks that all the sensors, publishers and other simulation systems are
        operational.
        """
        raise NotImplementedError()

    def _get_obs(self):
        """Returns the observation.
        """
        raise NotImplementedError()

    def _init_env_variables(self):
        """Inits variables needed to be initialised each time we reset at the start
        of an episode.
        """
        raise NotImplementedError()

    def _set_action(self, action):
        """Applies the given action to the simulation.
        """
        raise NotImplementedError()

    def _is_done(self, observations):
        """Indicates whether or not the episode is done ( the robot has fallen for example).
        """
        raise NotImplementedError()

    def _compute_reward(self, observations, done):
        """Calculates the reward to give based on the observations given.
        """
        raise NotImplementedError()

    def _env_setup(self, initial_qpos):
        """Initial configuration of the environment. Can be used to configure initial state
        and extract information from the simulation.
        """
        raise NotImplementedError()
Beispiel #9
0
    point.positions.append(joint_positions[i + 6])
    point.velocities.append(0)
    point.accelerations.append(0)
    point.effort.append(0)
jointCmd_finger.points.append(point)
rate = rospy.Rate(100)
count = 0

while (count < 50):
    pub.publish(jointCmd)
    pubf.publish(jointCmd_finger)
    count = count + 1
    rate.sleep()
time.sleep(1)

time.sleep(.1)

rospy.wait_for_service('/gazebo/pause_physics')
pause_gazebo()
rospy.wait_for_service('/gazebo/reset_simulation')
reset_sim()
rospy.wait_for_service('/gazebo/set_model_configuration')
print(model_config)
set_model_srv(model_config)
pub.publish(jointCmd)
pubf.publish(jointCmd_finger)
time.sleep(0.2)
rospy.wait_for_service('/gazebo/unpause_physics')
unpause_gazebo()
controllers_object.reset_controllers()