コード例 #1
0
class GazeboModularScara3DOFv3Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000 # limit the max episode step
        # class variable that iterates to accounts for number of steps per episode
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'
        self.reset_jnts = True
        self.choose_robot = 0

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.

        # make sure that the slowness is set properly!! In case we forget them defaults to seconds.
        # if self.slowness is None:
        #     slowness = 1
        #
        # else:
        #     slowness = self.slowness



        # slowness = 100000000 # 1 is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'scara_e1_rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [BASE, BASE_MOTOR,
                      SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
                      SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2,
                      SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3,
                      EE_LINK]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
             'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_3joints.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.realgoal = ee_tgt

        self.environment = {
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
            # 'num_samples': SAMPLE_COUNT,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty)

        # Seed the environment
        self._seed()

        # self.max_steps_episode = 1000
    def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=True):
            self.slowness = slowness
            self.slowness_unit = slowness_unit
            self.reset_jnts = reset_jnts
            print("slowness: ", self.slowness)
            print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit))
            print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts))

    def randomizeTargetPositions(self):
        """
        The goal is to test with randomized positions which range between the boundries of the H-ROS logo
        """
        print("In randomize target positions.")
        EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485,0.3883636), np.random.uniform(-0.1746508,0.1701576), 0.3746]) # boundry box of the first half H-ROS letters with +-0.01 offset
        EE_POS_TGT_RANDOM2 = np.asmatrix([np.random.uniform(0.2852485,0.3883636), np.random.uniform(-0.1746508,0.1701576), 0.3746]) # boundry box of the H-ROS letters with +-0.01 offset
        # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1
        ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT
        target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T)

        # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)

        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeTarget(self):
        print("calling randomize target")

        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)

        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeMultipleTargets(self):
        print("calling randomize multiple targets")

        EE_POS_TGT_1 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.0985179, 0.3746]) # center of H right
        EE_POS_TGT_3 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT_4 = np.asmatrix([0.3355224, 0.0344309, 0.3746]) # center of O left
        EE_POS_TGT_5 = np.asmatrix([0.3013209, 0.1647450, 0.3746]) # S top right
        EE_POS_TGT_6 = np.asmatrix([0.3349774, 0.1570571, 0.3746]) # S midlle

        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2
        ee_pos_tgt_3 = EE_POS_TGT_3
        ee_pos_tgt_4 = EE_POS_TGT_4
        ee_pos_tgt_5 = EE_POS_TGT_5
        ee_pos_tgt_6 = EE_POS_TGT_6

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        target3 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_3, ee_rot_tgt).T)
        target4 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_4, ee_rot_tgt).T)
        target5 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_5, ee_rot_tgt).T)
        target6 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_6, ee_rot_tgt).T)

        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        all_targets = [target1,target2,target3,target4,target5,target6]
        self.realgoal = random.choice(all_targets)
        # self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeCorrect realgoal: ", self.realgoal)


    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg =  message

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")
        # target.time_from_start.nsecs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty");
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(map(lambda x,y: x in y, message.joint_names,
                    [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(message.actual.positions) # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3),
                                                       ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message, self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(self.scara_chain,
                                        self.environment['link_names'],
                                        last_observations[:self.scara_chain.getNrOfJoints()],
                                        base_link=self.environment['link_names'][0],
                                        end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'],
                                                              trans,
                                                              rot).T)
            ee_points = current_ee_tgt - self.realgoal#self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians,
                                                                   self.environment['end_effector_points'],
                                                                   rot)
            ee_velocities = self.get_ee_points_velocities(ee_link_jacobians,
                                                           self.environment['end_effector_points'],
                                                           rot,
                                                           last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1),]

            return np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1),]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator+=1
        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        # self.ob = take_observation()

        # self.reward_dist = self.rmse_func(self.ob[3:6])
        # # print("reward_dist :", self.reward_dist)
        # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases
        #
        # # Calculate if the env has been solved
        # done = bool(abs(self.reward_dist) < 0.005)

        # reward as in the enviroment for 4DOF commented out
        self.reward_dist = -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])

        # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005):
            self.reward = 1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases
            print("Reward is: ", self.reward)
        else:
            self.reward = self.reward_dist

        # print("reward: ", self.reward)
        # print("rmse_func: ", self.rmse_func(ee_points))

        # # enviroment V2 reward
        # self.reward_dist = self.rmse_func(self.ob[3:6])
        # # print("reward_dist :", self.reward_dist)
        # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases

        # Calculate if the env has been solved
        done = (bool(abs(self.reward_dist) < 0.005))  or (self.iterator > self.max_episode_steps)

        # # Unpause simulation
        # rospy.wait_for_service('/gazebo/unpause_physics')
        # try:
        #     self.unpause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/unpause_physics service call failed")

        # Execute "action"
        # if rclpy.ok(): # ROS 2 code
        self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))

        #TODO: wait until action gets executed
        # When adding this the algorithm does not converge
        # time.sleep(int(self.environment['slowness']))
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds

        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while(self.ob is None):
            self.ob = self.take_observation()
        # print("in step, action: ", action[:3])
        # print("in step, observation: ", self.ob[:3])

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def _reset(self):
        # """
        # Reset the agent for a particular experiment condition.
        # """
        # # Resets the state of the environment and returns an initial observation.
        # rospy.wait_for_service('/gazebo/reset_simulation')
        # try:
        #     #reset_proxy.call()
        #     self.reset_proxy()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/reset_simulation service call failed")
        #.,
        # # Unpause simulation
        # rospy.wait_for_service('/gazebo/unpause_physics')
        # try:
        #     self.unpause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/unpause_physics service call failed")

        # Go to initial position and wait until it arrives there
        # self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']))
        # time.sleep(int(self.environment['slowness']))
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds

        # # Pause the simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")
        self.iterator = 0
        if self.reset_jnts is True:
            self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']))
            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                time.sleep(int(self.slowness))
            elif(self.slowness_unit == 'nsec'):
                time.sleep(int(self.slowness/1000000000)) # using nanoseconds
            else:
                print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while(self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
コード例 #2
0
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000 # limit the max episode step
        # class variable that iterates to accounts for number of steps per episode
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'
        self.reset_jnts = True
        self.choose_robot = 0

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.

        # make sure that the slowness is set properly!! In case we forget them defaults to seconds.
        # if self.slowness is None:
        #     slowness = 1
        #
        # else:
        #     slowness = self.slowness



        # slowness = 100000000 # 1 is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'scara_e1_rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [BASE, BASE_MOTOR,
                      SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
                      SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2,
                      SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3,
                      EE_LINK]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
             'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_3joints.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.realgoal = ee_tgt

        self.environment = {
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
            # 'num_samples': SAMPLE_COUNT,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty)

        # Seed the environment
        self._seed()
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self,
                                      "ModularScara3_Obstacles_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self._collision_msg = None  ###
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000  # limit the max episode step
        # class variable that iterates to accounts for number of steps per episode
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.26342])  #center of S
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.

        # make sure that the slowness is set properly!! In case we forget them defaults to seconds.
        # if self.slowness is None:
        #     slowness = 1
        #
        # else:
        #     slowness = self.slowness

        # slowness = 100000000 # 1 is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base'
        # BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        # SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        # SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        # SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        # SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        # SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        # SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        # SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        # SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        # SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        # SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        # SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        # SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [
            SCARA_MOTOR1, SCARA_MOTOR2, SCARA_MOTOR3, SCARA_RANGEFINDER,
            EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        # URDF_PATH = "/home/erle/ros_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf"
        #URDF_PATH = "/media/raid/RL/catkin_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf"
        rospack = rospkg.RosPack()
        prefix_path = rospack.get_path('scara_e1_description')

        if (prefix_path == None):
            print("I can't find scara_e1_description")
            sys.exit(0)

        URDF_PATH = prefix_path + "/urdf/scara_e1_3joints_simplified.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.realgoal = ee_tgt

        self.environment = {
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
            # 'num_samples': SAMPLE_COUNT,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command',
                                    JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state',
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState,
                                          self.collision_callback)  ##
        # self._sub_normals = rospy.Subscriber('/gazebo_normals', Vector3, self.normals_callback, queue_size=1, buff_size=2**24) ##
        #self._sub_coll = rospy.Subscriber('/gazebo_contacts',Bool, collision_callback) ##

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        # print("self.environment['link_names'][0]:", self.environment['link_names'][0])
        # print("self.environment['link_names'][-1]:", self.environment['link_names'][-1])
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        print("nr of joints: ", self.scara_chain.getNrOfJoints())

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 6  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        #low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        #high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        low = -np.pi * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)
        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                            SpawnModel)
        self.remove_model = rospy.ServiceProxy('/gazebo/delete_model',
                                               DeleteModel)

        model_xml = "<?xml version=\"1.0\"?> \
                    <robot name=\"myfirst\"> \
                      <link name=\"world\"> \
                      </link>\
                      <link name=\"cylinder0\">\
                        <visual>\
                          <geometry>\
                            <sphere radius=\"0.01\"/>\
                          </geometry>\
                          <origin xyz=\"0 0 0\"/>\
                          <material name=\"rojotransparente\">\
                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                          </material>\
                        </visual>\
                        <inertial>\
                          <mass value=\"5.0\"/>\
                          <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                        </inertial>\
                      </link>\
                      <joint name=\"world_to_base\" type=\"fixed\"> \
                        <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                        <parent link=\"world\"/>\
                        <child link=\"cylinder0\"/>\
                      </joint>\
                      <gazebo reference=\"cylinder0\">\
                        <material>Gazebo/GreenTransparent</material>\
                      </gazebo>\
                    </robot>"

        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0, 0]
        pose.position.y = EE_POS_TGT[0, 1]
        pose.position.z = 0.055  #EE_POS_TGT[0,2];

        #Static obstacle (not in original code)
        # pose.position.x = 0.25;#
        # pose.position.y = 0.07;#
        # pose.position.z = 0.0;#

        pose.orientation.x = 0
        pose.orientation.y = 0
        pose.orientation.z = 0
        pose.orientation.w = 0
        reference_frame = ""
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        self.add_model(model_name="target",
                       model_xml=model_xml,
                       robot_namespace="",
                       initial_pose=pose,
                       reference_frame="")

        #self.addObstacle()

        # Seed the environment
        self._seed()
class GazeboModularScara3DOFStaticObstaclev1Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self,
                                      "ModularScara3_Obstacles_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self._collision_msg = None  ###
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000  # limit the max episode step
        # class variable that iterates to accounts for number of steps per episode
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.26342])  #center of S
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.

        # make sure that the slowness is set properly!! In case we forget them defaults to seconds.
        # if self.slowness is None:
        #     slowness = 1
        #
        # else:
        #     slowness = self.slowness

        # slowness = 100000000 # 1 is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base'
        # BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        # SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        # SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        # SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        # SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        # SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        # SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        # SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        # SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        # SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        # SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        # SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        # SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [
            SCARA_MOTOR1, SCARA_MOTOR2, SCARA_MOTOR3, SCARA_RANGEFINDER,
            EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        # URDF_PATH = "/home/erle/ros_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf"
        #URDF_PATH = "/media/raid/RL/catkin_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf"
        rospack = rospkg.RosPack()
        prefix_path = rospack.get_path('scara_e1_description')

        if (prefix_path == None):
            print("I can't find scara_e1_description")
            sys.exit(0)

        URDF_PATH = prefix_path + "/urdf/scara_e1_3joints_simplified.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.realgoal = ee_tgt

        self.environment = {
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
            # 'num_samples': SAMPLE_COUNT,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command',
                                    JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state',
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState,
                                          self.collision_callback)  ##
        # self._sub_normals = rospy.Subscriber('/gazebo_normals', Vector3, self.normals_callback, queue_size=1, buff_size=2**24) ##
        #self._sub_coll = rospy.Subscriber('/gazebo_contacts',Bool, collision_callback) ##

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        # print("self.environment['link_names'][0]:", self.environment['link_names'][0])
        # print("self.environment['link_names'][-1]:", self.environment['link_names'][-1])
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        print("nr of joints: ", self.scara_chain.getNrOfJoints())

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 6  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        #low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        #high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        low = -np.pi * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)
        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                            SpawnModel)
        self.remove_model = rospy.ServiceProxy('/gazebo/delete_model',
                                               DeleteModel)

        model_xml = "<?xml version=\"1.0\"?> \
                    <robot name=\"myfirst\"> \
                      <link name=\"world\"> \
                      </link>\
                      <link name=\"cylinder0\">\
                        <visual>\
                          <geometry>\
                            <sphere radius=\"0.01\"/>\
                          </geometry>\
                          <origin xyz=\"0 0 0\"/>\
                          <material name=\"rojotransparente\">\
                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                          </material>\
                        </visual>\
                        <inertial>\
                          <mass value=\"5.0\"/>\
                          <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                        </inertial>\
                      </link>\
                      <joint name=\"world_to_base\" type=\"fixed\"> \
                        <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                        <parent link=\"world\"/>\
                        <child link=\"cylinder0\"/>\
                      </joint>\
                      <gazebo reference=\"cylinder0\">\
                        <material>Gazebo/GreenTransparent</material>\
                      </gazebo>\
                    </robot>"

        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0, 0]
        pose.position.y = EE_POS_TGT[0, 1]
        pose.position.z = 0.055  #EE_POS_TGT[0,2];

        #Static obstacle (not in original code)
        # pose.position.x = 0.25;#
        # pose.position.y = 0.07;#
        # pose.position.z = 0.0;#

        pose.orientation.x = 0
        pose.orientation.y = 0
        pose.orientation.z = 0
        pose.orientation.w = 0
        reference_frame = ""
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        self.add_model(model_name="target",
                       model_xml=model_xml,
                       robot_namespace="",
                       initial_pose=pose,
                       reference_frame="")

        #self.addObstacle()

        # Seed the environment
        self._seed()

        # self.max_steps_episode = 1000
    def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=False):
        self.slowness = slowness
        self.slowness_unit = slowness_unit
        print("slowness: ", self.slowness)
        print("slowness_unit: ", self.slowness_unit, "type of variable: ",
              type(slowness_unit))

    def randomizeObstacle(self):
        print("calling randomize obstacle")
        #EE_POS_TGT_1 = np.asmatrix([0.3239543, 0.0083323, 0.3746]) #center of R
        #EE_POS_TGT_1 = np.asmatrix([0.29557, -0.0422738, 0.3746]) # R top left
        EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571,
                                    0.26342])  #center of S 0.26342
        #EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571, 0.3746]) # S center

        #EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121,
                                    0.26342])  # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        #self.realgoal = target1 if np.random.uniform() < 0.5 else target2

        if np.random.uniform() < 0.5:
            self.addObstacle()
            #self.realgoal = target2
            self.realgoal = target1
            print("\n\nOBSTACLE\n\n")
        else:
            # self.removeObstacle()
            self.addObstacle()
            self.realgoal = target1
            print("\n\nNO OBSTACLE\n\n")

        print("randomizeObstacle realgoal: ", self.realgoal)
        #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])])
        # 0 = obstacle. 1 = no obstacle.
        # self.realgoal = 0
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H

    #def randomizeCorrect(self):
    def randomizeTarget(self):
        print("calling randomize correct")
        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366,
                                    0.3746])  # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121,
                                    0.3746])  # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)
        #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])])
        # 0 = obstacle. 1 = no obstacle.
        # self.realgoal = 0
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H

    def collision_callback(self, message):
        """
        Callback method for the subscriber of Collision data
        """
        # print("\ncollision: ", self._collision_msg)
        self._collision_msg = message

    def normals_callback(self, message):
        """
        Callback method for the subscriber of Collision data
        """

        self._normals_msg = message

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")
        # target.time_from_start.nsecs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:self.scara_chain.getNrOfJoints()],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.realgoal  #self.environment['ee_points_tgt']
            # print("current_ee_tgt: ", current_ee_tgt)
            # print("ee_points:", ee_points)
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1), ]

            return np.r_[np.reshape(last_observations, -1),
                         np.reshape(ee_points, -1),
                         np.reshape(ee_velocities, -1), ]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator += 1
        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        # self.ob = take_observation()

        # self.reward_dist = self.rmse_func(self.ob[3:6])
        # # print("reward_dist :", self.reward_dist)
        # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases
        #
        # # Calculate if the env has been solved
        # done = bool(abs(self.reward_dist) < 0.005)

        # reward as in the enviroment for 4DOF commented out
        self.reward_dist = -self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints():
                    (self.scara_chain.getNrOfJoints() + 3)])

        # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(
                self.scara_chain.getNrOfJoints() + 3)]) < 0.005):
            self.reward = 1 - self.rmse_func(
                self.ob[self.scara_chain.getNrOfJoints():(
                    self.scara_chain.getNrOfJoints() +
                    3)])  # Make the reward increase as the distance decreases
            print("Reward is: ", self.reward)
        else:
            self.reward = self.reward_dist

        # print("\n STEP -> collision_msg", self._collision_msg)
        # print("\n STEP -> collision_msg.data", self._collision_msg.data)
        # If there is a collision, penalize reward considerably
        # if self._collision_msg.data == True:
        #     self.reward = self.reward - 1000
        #     # print("\n\n\n Penalized reward after COLLISION", self._collision_msg)

        # print("string is empty")

        # print("reward: ", self.reward)
        # print("rmse_func: ", self.rmse_func(ee_points))

        # # enviroment V2 reward
        # self.reward_dist = self.rmse_func(self.ob[3:6])
        # # print("reward_dist :", self.reward_dist)
        # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases

        # Calculate if the env has been solved
        done = (bool(abs(self.reward_dist) < 0.005)) or (
            self.iterator > self.max_episode_steps)

        # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))

        # # Unpause simulation
        # rospy.wait_for_service('/gazebo/unpause_physics')
        # try:
        #     self.unpause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/unpause_physics service call failed")

        # # Execute "action"
        # if self._collision_msg.collision2_name == '' or self._collision_msg.collision1_name == '':
        #     self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))
        # else:
        #     # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))
        #     # self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']))
        #     self.reward = self.reward - 5
        #     print("\n\n\n Penalized reward after COLLISION", self._collision_msg)
        #     time.sleep(1)
        self._pub.publish(
            self.get_trajectory_message(
                action[:self.scara_chain.getNrOfJoints()]))

        #REWARD SHAPING sophisticated penalization based on https://arxiv.org/pdf/1609.07845.pdf
        if self._collision_msg.collision2_name == '' or self._collision_msg.collision1_name == '':
            self._pub.publish(
                self.get_trajectory_message(
                    action[:self.scara_chain.getNrOfJoints()]))
        else:
            #check if depth is existing
            if self._collision_msg.depths is None:
                print("self._collision_msg.depths[0] is empty")
            # print("Contact detected.", self._collision_msg.collision1_name, ", ", self._collision_msg.collision2_name)
            else:
                contact_depths = 1000 * self._collision_msg.depths[
                    0]  #make them in mm otherwise we have too many decimals
                print("\ncontact_depths", contact_depths)
                self._pub.publish(
                    self.get_trajectory_message(
                        action[:self.scara_chain.getNrOfJoints()]))
                #we always assume that depths is positive here. Sometimes depths is negative (actually most of the time).
                # changing to abs value of depths
                if contact_depths > 0 and abs(contact_depths) < 0.0001:
                    self.reward = self.reward - (abs(contact_depths)) / 2
                elif contact_depths > 0 and abs(contact_depths) > 0.0001:
                    self.reward = self.reward - abs(contact_depths)
                else:
                    print("self._collision_msg.depths[0]:", contact_depths)

        #TODO: wait until action gets executed
        # When adding this the algorithm does not converge
        # time.sleep(int(self.environment['slowness']))
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds

        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()
        # print("in step, action: ", action[:3])
        # print("in step, observation: ", self.ob[:3])

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def addObstacle(self):
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        #   <material name=\"green\">\
        #     <color rgba=\"0 0.8 .8 1\"/>\
        #   </material>\

        try:
            # #NORMAL CYLINDER
            model_xml = "<?xml version=\"1.0\"?> \
                                    <robot name=\"myfirst\"> \
                                      <link name=\"world\"> \
                                      </link>\
                                      <link name=\"cylinder0\">\
                                        <visual>\
                                          <geometry>\
                                            <box size=\".1 .05 1\"/>\
                                          </geometry>\
                                          <origin xyz=\"0 0 0\"/>\
                                          <material name=\"blue\">\
                                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                                          </material>\
                                        </visual>\
                                        <inertial>\
                                          <mass value=\"5.0\"/>\
                                          <inertia ixx=\"1\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1\" iyz=\"0.0\" izz=\"1\"/>\
                                        </inertial>\
                                        <collision>\
                                          <geometry>\
                                            <box size=\".2 .05 1\"/>\
                                          </geometry>\
                                          <surface>\
                                            <contact>\
                                              <ode>\
                                                <min_depth>\"0.001\"</min_depth>\
                                                <kp>\"1e6\"</kp>\
                                                <kd>\"1.000000\"</kd>\
                                              </ode>\
                                             </contact>\
                                          </surface>\
                                       </collision>\
                                      </link>\
                                      <joint name=\"world_to_base\" type=\"fixed\"> \
                                        <origin xyz=\"0 0 0.5\" rpy=\"0 0 0\"/>\
                                        <parent link=\"world\"/>\
                                        <child link=\"cylinder0\"/>\
                                      </joint>\
                                      <gazebo reference=\"cylinder0\">\
                                        <material>Gazebo/Blue</material>\
                                      </gazebo>\
                                    </robot>"

            robot_namespace = ""
            pose = Pose()
            number = random.randint(0, 4)
            #pose.position.x = 0.15;#
            pose.position.x = 0.3
            #
            pose.position.y = 0.07
            #
            pose.position.z = 0.05
            pose.orientation.x = 0
            pose.orientation.y = 0
            pose.orientation.z = 0
            pose.orientation.w = 0

            #BOX
            # model_xml = "<?xml version=\"1.0\"?> \
            #              <robot name=\"myfirst\"> \
            #                   <link name=\"world\"> \
            #                   </link>\
            #                   <link name=\"box0\">\
            #                     <visual>\
            #                       <geometry>\
            #                         <box>\
            #                             <size>0.2 0.2 1.0</size>\
            #                         </box>\
            #                       </geometry>\
            #                       <origin xyz=\"0 0 0\"/>\
            #                       <material name=\"blue\">\
            #                           <ambient>0.5 0.5 1.0 0.1</ambient>\
            #                           <diffuse>0.5 0.5 1.0 0.1</diffuse>\
            #                       </material>\
            #                     </visual>\
            #                     <inertial>\
            #                       <mass value=\"5.0\"/>\
            #                       <inertia>\
            #                         <ixx>1.0</ixx>\
            #                         <ixy>0</ixy>\
            #                         <ixz>0</ixz>\
            #                         <iyy>1.0</iyy>\
            #                         <iyz>0</iyz>\
            #                         <izz>1.0</izz>\
            #                       </inertia>\
            #                     </inertial>\
            #                     <collision>\
            #                       <geometry>\
            #                         <box>\
            #                             <size>0.2 0.2 1.0</size>\
            #                         </box>\
            #                       </geometry>\
            #                     </collision>\
            #                   </link>\
            #                   <joint name=\"world_to_base\" type=\"fixed\"> \
            #                     <origin xyz=\"0 0 0.7\" rpy=\"0 0 0\"/>\
            #                     <parent link=\"world\"/>\
            #                     <child link=\"box0\"/>\
            #                   </joint>\
            #                   <gazebo reference=\"box0\">\
            #                     <material>Gazebo/Blue</material>\
            #                   </gazebo>\
            #                 </robot>"

            # model_xml = "<?xml version=\"1.0\"?> \
            #             <robot name=\"myfirst\"> \
            #               <link name=\"world\"> \
            #               </link>\
            #               <link name=\"cylinder0\">\
            #                 <visual>\
            #                   <geometry>\
            #                     <sphere radius=\"0.01\"/>\
            #                   </geometry>\
            #                   <origin xyz=\"0 0 0\"/>\
            #                   <material name=\"rojotransparente\">\
            #                       <ambient>0.5 0.5 1.0 0.1</ambient>\
            #                       <diffuse>0.5 0.5 1.0 0.1</diffuse>\
            #                   </material>\
            #                 </visual>\
            #                 <inertial>\
            #                   <mass value=\"5.0\"/>\
            #                   <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
            #                 </inertial>\
            #               </link>\
            #               <joint name=\"world_to_base\" type=\"fixed\"> \
            #                 <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
            #                 <parent link=\"world\"/>\
            #                 <child link=\"cylinder0\"/>\
            #               </joint>\
            #               <gazebo reference=\"cylinder0\">\
            #                 <material>Gazebo/GreenTransparent</material>\
            #               </gazebo>\
            #             </robot>"

            # robot_namespace = ""
            # pose = Pose()
            # number = random.randint(0, 4)
            # # if number == 1:
            # #     pose.position.x = 0.25;
            # #     pose.position.y = 0.07;
            # # if number == 2:
            # #     pose.position.x = 0.25;
            # #     pose.position.y = -0.07;
            # # if number == 3:
            # #     pose.position.x = 0.23;
            # #     pose.position.y = -0.087;
            # # if number == 4:
            # #     pose.position.x = 0.23;
            # #     pose.position.y = 0.087;
            #
            # pose.position.x = 0.25;#
            # pose.position.y = 0.07;#
            #
            # pose.position.z = 0.0;
            # pose.orientation.x = 0;
            # pose.orientation.y= 0;
            # pose.orientation.z = 0;
            # pose.orientation.w = 0;
            reference_frame = ""
            if number > -1:
                self.add_model(model_name="obstacle",
                               model_xml=model_xml,
                               robot_namespace=robot_namespace,
                               initial_pose=pose,
                               reference_frame="")
        except (rospy.ServiceException) as e:
            print("/gazebo/spawn_urdf_model service call failed")

    def removeObstacle(self):
        rospy.wait_for_service('/gazebo/delete_model')
        try:
            self.remove_model(model_name="obstacle")
        except (rospy.ServiceException) as e:
            print("/gazebo/spawn_urdf_model service call failed")

    def _reset(self):
        # """
        # Reset the agent for a particular experiment condition.
        # """
        self.iterator = 0
        msg = self.get_trajectory_message(
            self.environment['reset_conditions']['initial_positions'])
        msg.points[0].time_from_start.secs = 0
        msg.points[0].time_from_start.nsecs = 10000
        self._pub.publish(msg)

        # time.sleep(int(self.environment['slowness'])) # using seconds
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            time.sleep(int(self.slowness))
        elif (self.slowness_unit == 'nsec'):
            time.sleep(int(self.slowness / 1000000000))  # using nanoseconds
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
コード例 #5
0
ファイル: mara_real.py プロジェクト: ri-ceres/gym-gazebo2
    def __init__(self):
        """
        Initialize the MARA environemnt
        """
        # Manage command line args
        args = ut_generic.getArgsParserMARA().parse_args()
        self.real_speed = args.real_speed
        self.velocity = args.velocity
        # Set the path of the corresponding URDF file
        URDF_PATH = get_prefix_path(
            "mara_description"
        ) + "/share/mara_description/urdf/mara_robot_gripper_140.urdf"

        # Create the node after the new ROS_DOMAIN_ID is set in generate_launch_description()
        rclpy.init(args=None)
        self.node = rclpy.create_node(self.__class__.__name__)

        # class variables
        self._observation_msg = None
        self.obs = None
        self.action_space = None
        self.target_position = None
        self.target_orientation = None
        self.max_episode_steps = 1024
        self.iterator = 0
        self.reset_jnts = True

        #############################
        #   Environment hyperparams
        #############################
        # Target, where should the agent reach
        self.target_position = np.asarray([-0.40028, 0.095615,
                                           0.72466])  # close to the table
        self.target_orientation = np.asarray(
            [0., 0.7071068, 0.7071068, 0.])  # arrow looking down [w, x, y, z]
        # self.target_position = np.asarray([-0.386752, -0.000756, 1.40557]) # easy point
        # self.target_orientation = np.asarray([-0.4958324, 0.5041332, 0.5041331, -0.4958324]) # arrow looking opposite to MARA [w, x, y, z]

        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])

        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.])

        # # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/mara_controller/command'
        JOINT_SUBSCRIBER = '/mara_controller/state'

        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        MOTOR4_JOINT = 'motor4'
        MOTOR5_JOINT = 'motor5'
        MOTOR6_JOINT = 'motor6'
        EE_LINK = 'ee_link'

        # Set constants for links
        WORLD = 'world'
        TABLE = 'table'
        BASE = 'base_link'
        BASE_ROBOT = 'base_robot'
        MARA_MOTOR1_LINK = 'motor1_link'
        MARA_MOTOR2_LINK = 'motor2_link'
        MARA_MOTOR3_LINK = 'motor3_link'
        MARA_MOTOR4_LINK = 'motor4_link'
        MARA_MOTOR5_LINK = 'motor5_link'
        MARA_MOTOR6_LINK = 'motor6_link'
        EE_LINK = 'ee_link'

        JOINT_ORDER = [
            MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT,
            MOTOR5_JOINT, MOTOR6_JOINT
        ]
        LINK_NAMES = [
            WORLD, TABLE, BASE, BASE_ROBOT, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK,
            MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK,
            MARA_MOTOR6_LINK, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)

        # Initialize target end effector position
        self.environment = {
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'end_effector_points': EE_POINTS,
        }

        # Subscribe to the appropriate topics, taking into account the particular robot
        self._pub = self.node.create_publisher(
            JointTrajectory,
            JOINT_PUBLISHER,
            qos_profile=qos_profile_sensor_data)
        self._sub = self.node.create_subscription(
            JointTrajectoryControllerState,
            JOINT_SUBSCRIBER,
            self.observation_callback,
            qos_profile=qos_profile_sensor_data)
        self.reset_sim = self.node.create_client(Empty, '/reset_simulation')

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = tree_urdf.treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.mara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        self.num_joints = self.mara_chain.getNrOfJoints()
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.mara_chain)

        self.obs_dim = self.num_joints + 10

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        low = -np.pi * np.ones(self.num_joints)
        high = np.pi * np.ones(self.num_joints)
        self.action_space = spaces.Box(low, high)

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # Seed the environment
        self.seed()
コード例 #6
0
    def __init__(self):

        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "Box_Vision_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self._camera_image = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746])
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0, 0, 0])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.
        # # Set the number of seconds per step of a sample.
        # TIMESTEP = 0.01  # Typically 0.01.
        # # Set the number of timesteps per sample.
        # STEP_COUNT = 100  # Typically 100.
        # # Set the number of samples per condition.
        # SAMPLE_COUNT = 5  # Typically 5.
        # # Set the number of trajectory iterations to collect.
        # ITERATIONS = 20  # Typically 10.
        # How much time does it take to execute the trajectory (in seconds)
        slowness = 2

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'scara_e1_rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [
            BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1,
            SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
            SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2,
            SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3,
            SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        #############################
        rospack = rospkg.RosPack()
        prefix_path = rospack.get_path('scara_e1_description')

        if (prefix_path == None):
            print("I can't find scara_e1_description")
            sys.exit(0)

        URDF_PATH = prefix_path + "/urdf/scara_e1_model_3joints_simplified_camera_behind.urdf.xacro"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.environment = {
            # 'dt': TIMESTEP,
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command',
                                    JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state',
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        self._image_sub = rospy.Subscriber('/scara/camera/image_raw', Image,
                                           self.camera_callback)

        #self._sub = rospy.Subscriber('/scara/camera', JointTrajectoryControllerState, self.observation_callback)

        # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions
        # rclpy.init(args=None)
        # self.ros2_node = rclpy.create_node('robot_ai_node')
        # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER)
        # # self._callbacks = partial(self.observation_callback, robot_id=0)
        # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data)
        # # self._time_lock = threading.RLock()

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = 9  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)
        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                            SpawnModel)
        # Seed the environment
        self._seed()

        model_xml = "<?xml version=\"1.0\"?> \
                    <robot name=\"myfirst\"> \
                      <link name=\"world\"> \
                      </link>\
                      <link name=\"cylinder0\">\
                        <visual>\
                          <geometry>\
                            <sphere radius=\"0.01\"/>\
                          </geometry>\
                          <origin xyz=\"0 0 0\"/>\
                          <material name=\"rojotransparente\">\
                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                          </material>\
                        </visual>\
                        <inertial>\
                          <mass value=\"5.0\"/>\
                          <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                        </inertial>\
                      </link>\
                      <joint name=\"world_to_base\" type=\"fixed\"> \
                        <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                        <parent link=\"world\"/>\
                        <child link=\"cylinder0\"/>\
                      </joint>\
                      <gazebo reference=\"cylinder0\">\
                        <material>Gazebo/GreenTransparent</material>\
                      </gazebo>\
                    </robot>"

        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0, 0]
        pose.position.y = EE_POS_TGT[0, 1]
        pose.position.z = 0.055  #EE_POS_TGT[0,2];

        #Static obstacle (not in original code)
        # pose.position.x = 0.25;#
        # pose.position.y = 0.07;#
        # pose.position.z = 0.0;#

        pose.orientation.x = 0
        pose.orientation.y = 0
        pose.orientation.z = 0
        pose.orientation.w = 0
        reference_frame = ""
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        self.add_model(model_name="target",
                       model_xml=model_xml,
                       robot_namespace="",
                       initial_pose=pose,
                       reference_frame="")
コード例 #7
0
class Box3DOFv1Env(gazebo_env.GazeboEnv):
    def __init__(self):

        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "Box_Vision_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self._camera_image = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746])
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0, 0, 0])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.
        # # Set the number of seconds per step of a sample.
        # TIMESTEP = 0.01  # Typically 0.01.
        # # Set the number of timesteps per sample.
        # STEP_COUNT = 100  # Typically 100.
        # # Set the number of samples per condition.
        # SAMPLE_COUNT = 5  # Typically 5.
        # # Set the number of trajectory iterations to collect.
        # ITERATIONS = 20  # Typically 10.
        # How much time does it take to execute the trajectory (in seconds)
        slowness = 2

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'scara_e1_rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [
            BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1,
            SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
            SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2,
            SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3,
            SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        #############################
        rospack = rospkg.RosPack()
        prefix_path = rospack.get_path('scara_e1_description')

        if (prefix_path == None):
            print("I can't find scara_e1_description")
            sys.exit(0)

        URDF_PATH = prefix_path + "/urdf/scara_e1_model_3joints_simplified_camera_behind.urdf.xacro"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.environment = {
            # 'dt': TIMESTEP,
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command',
                                    JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state',
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        self._image_sub = rospy.Subscriber('/scara/camera/image_raw', Image,
                                           self.camera_callback)

        #self._sub = rospy.Subscriber('/scara/camera', JointTrajectoryControllerState, self.observation_callback)

        # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions
        # rclpy.init(args=None)
        # self.ros2_node = rclpy.create_node('robot_ai_node')
        # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER)
        # # self._callbacks = partial(self.observation_callback, robot_id=0)
        # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data)
        # # self._time_lock = threading.RLock()

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = 9  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)
        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                            SpawnModel)
        # Seed the environment
        self._seed()

        model_xml = "<?xml version=\"1.0\"?> \
                    <robot name=\"myfirst\"> \
                      <link name=\"world\"> \
                      </link>\
                      <link name=\"cylinder0\">\
                        <visual>\
                          <geometry>\
                            <sphere radius=\"0.01\"/>\
                          </geometry>\
                          <origin xyz=\"0 0 0\"/>\
                          <material name=\"rojotransparente\">\
                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                          </material>\
                        </visual>\
                        <inertial>\
                          <mass value=\"5.0\"/>\
                          <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                        </inertial>\
                      </link>\
                      <joint name=\"world_to_base\" type=\"fixed\"> \
                        <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                        <parent link=\"world\"/>\
                        <child link=\"cylinder0\"/>\
                      </joint>\
                      <gazebo reference=\"cylinder0\">\
                        <material>Gazebo/GreenTransparent</material>\
                      </gazebo>\
                    </robot>"

        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0, 0]
        pose.position.y = EE_POS_TGT[0, 1]
        pose.position.z = 0.055  #EE_POS_TGT[0,2];

        #Static obstacle (not in original code)
        # pose.position.x = 0.25;#
        # pose.position.y = 0.07;#
        # pose.position.z = 0.0;#

        pose.orientation.x = 0
        pose.orientation.y = 0
        pose.orientation.z = 0
        pose.orientation.w = 0
        reference_frame = ""
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        self.add_model(model_name="target",
                       model_xml=model_xml,
                       robot_namespace="",
                       initial_pose=pose,
                       reference_frame="")

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message
        # print("!!!!!!!!!Received an observation")

    def camera_callback(self, message):
        """
        Callback method for the subscriber of Image
        """
        self._camera_image = message
        print("\ncamera_image.height", self._camera_image.height)
        print("\ncamera_image.width", self._camera_image.width)

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        target.time_from_start.secs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(3)
        # Initialize a joint array for the present 6 joint angles.
        angles = JntArray(3)
        # Construct the joint array from the most recent joint angles.
        for i in range(3):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, 3)
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def addObstacle(self):
        print("\nADD OBSTACLE")
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        try:
            # #NORMAL BOX
            model_xml = "<?xml version=\"1.0\"?> \
                                <robot name=\"myfirst\"> \
                                <link name=\"world\"> \
                                </link>\
                                <link name=\"cylinder0\">\
                                <visual>\
                                    <geometry>\
                                        <box size=\".05 .05 .05\"/>\
                                        </geometry>\
                                    <origin xyz=\"0 0 0\"/>\
                                    <material name=\"red\">\
                                        <ambient>0.5 0.5 1.0 0.1</ambient>\
                                        <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                                    </material>\
                                </visual>\
                                <inertial>\
                                    <mass value=\"10.\"/>\
                                    <inertia ixx=\"1\" ixy=\".0\" ixz=\"0.0\" iyy=\"1\" iyz=\"0.0\" izz=\"1\"/>\
                                </inertial>\
                                <collision>\
                                    <geometry>\
                                        <box size=\".05 .05 .05\"/>\
                                    </geometry>\
                                </collision>\
                                </link>\
                                <joint name=\"world_to_base\" type=\"fixed\"> \
                                    <origin xyz=\"0 0 0.3\" rpy=\"0 0 0\"/>\
                                    <parent link=\"world\"/>\
                                    <child link=\"cylinder0\"/>\
                                </joint>\
                                <gazebo reference=\"cylinder0\">\
                                    <material>Gazebo/Red</material>\
                                </gazebo>\
                        </robot>"

            robot_namespace = ""
            pose = Pose()
            number = random.randint(0, 4)
            #pose.position.x = 0.15;#
            pose.position.x = 0.3
            #
            pose.position.y = 0.07
            #
            pose.position.z = 0.05
            pose.orientation.x = 0
            pose.orientation.y = 0
            pose.orientation.z = 0
            pose.orientation.w = 0

            reference_frame = ""

            self.add_model(model_name="obstacle",
                           model_xml=model_xml,
                           robot_namespace=robot_namespace,
                           initial_pose=pose,
                           reference_frame="")
        except (rospy.ServiceException) as e:
            print("/gazebo/spawn_urdf_model service call failed")

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        # # ROS 2, Acquire the lock to prevent the subscriber thread from
        # # updating times or observation messages.
        # self._time_lock.acquire(True)
        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:3],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1), ]

            return np.r_[np.reshape(last_observations, -1),
                         np.reshape(ee_points, -1),
                         np.reshape(ee_velocities, -1), ]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - observation
            - reward
            - done (status)
            - dictionary (#TODO clarify)
        """
        # Unpause simulation
        rospy.wait_for_service('/gazebo/unpause_physics')
        try:
            self.unpause()
        except (rospy.ServiceException) as e:
            print("/gazebo/unpause_physics service call failed")

        # Execute "action"
        # if rclpy.ok(): # ROS 2 code
        self._pub.publish(self.get_trajectory_message(action[:3]))

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Pause simulation
        rospy.wait_for_service('/gazebo/pause_physics')
        try:
            #resp_pause = pause.call()
            self.pause()
        except (rospy.ServiceException) as e:
            print("/gazebo/pause_physics service call failed")

        # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        # self.ob = take_observation()

        self.reward_dist = -self.rmse_func(self.ob[2])
        if abs(self.reward_dist) < 0.005:
            # print("reward (Eucledian dist (mm)): ", -1000 * self.reward_dist)
            self.reward = 100 + self.reward_dist
        else:
            # print("reward (Eucledian dist (mm)): ", -1000 * self.reward_dist)
            self.reward = self.reward_dist

        # print("reward: ", self.reward)

        # Calculate if the env has been solved
        done = bool(abs(self.reward_dist) < 0.005)

        # self._time_lock.release() # ROS 2 code

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def _reset(self):
        """
        Reset the agent for a particular experiment condition.
        """
        # Resets the state of the environment and returns an initial observation.
        rospy.wait_for_service('/gazebo/reset_simulation')
        try:
            #reset_proxy.call()
            self.reset_proxy()
        except (rospy.ServiceException) as e:
            print("/gazebo/reset_simulation service call failed")

        # Unpause simulation
        rospy.wait_for_service('/gazebo/unpause_physics')
        try:
            self.unpause()
        except (rospy.ServiceException) as e:
            print("/gazebo/unpause_physics service call failed")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Pause the simulation
        rospy.wait_for_service('/gazebo/pause_physics')
        try:
            self.pause()
        except (rospy.ServiceException) as e:
            print("/gazebo/pause_physics service call failed")

        # Return the corresponding observation
        return self.ob