Esempio n. 1
0
    def _get_jacobians(self, state):
        """Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """

        # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(6)

        # Initialize a joint array for the present 6 joint angles.
        angles = JntArray(6)

        # Construct the joint array from the most recent joint angles.
        for i in range(6):
            angles[i] = state[i]

        # Initialize a tree structure from the robot urdf.
        # Note that the xacro of the urdf is updated by hand.
        # Then the urdf must be compiled.
        _, ur_tree = treeFromFile(self._hyperparams['tree_path'])

        # Retrieve a chain structure between the base and the start of the end effector.
        ur_chain = ur_tree.getChain(self._hyperparams['link_names'][0],
                                    self._hyperparams['link_names'][6])

        # Initialize a KDL Jacobian solver from the chain.
        jac_solver = ChainJntToJacSolver(ur_chain)

        # Update the jacobian by solving for the given angles.
        jac_solver.JntToJac(angles, jacobian)

        # Initialize a numpy array to store the Jacobian.
        J = np.zeros((6, 6))
        for i in range(jacobian.rows()):
            for j in range(jacobian.columns()):
                J[i, j] = jacobian[i, j]

        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J[0:3, :]

        return ee_jacobians
Esempio n. 2
0
class GazeboMARATopOrientCollisionv0Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "MARATop6DOF_Collision_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000  # now used in all algorithms
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'
        self.reset_jnts = True
        self._collision_msg = None

        self._time_lock = threading.RLock()

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach

        # EE_POS_TGT = np.asmatrix([-0.40028, 0.095615, 0.72466]) # alex2
        EE_POS_TGT = np.asmatrix([-0.580238, -0.179591,
                                  0.72466])  # rubik touching the bar
        # EE_ROT_TGT = np.asmatrix([[-0.00128296,  0.9999805 ,  0.00611158],
        #                            [ 0.00231397, -0.0061086 ,  0.99997867],
        #                            [ 0.9999965 ,  0.00129708, -0.00230609]])
        # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis
        # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981])
        # EE_POS_TGT = np.asmatrix([-0.4023037912211465, 0.15501116706606247, 0.7238499613771884]) # 200 cm from the z axis

        # # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H
        # EE_ROT_TGT = np.asmatrix([[-0.99521107,  0.09689605, -0.01288708],
        #                           [-0.09768035, -0.99077857,  0.09389558],
        #                           [-0.00367013,  0.09470474,  0.99549864]])

        # EE_ROT_TGT = np.asmatrix([[-0.99521107,  0.09689605, -0.01288708],
        #                           [-0.09768035, -0.99077857,  0.09389558],
        #                           [-0.00367013,  0.09470474,  0.99549864]])
        # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287],
                                  [0.51531424, 0.85207952, 0.09171542],
                                  [-0.31601302, 0.08944959,
                                   0.94452874]])  # original orientation
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        # INITIAL_JOINTS = np.array([0., 0., 1., 0., 1.57, 0.])
        INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        # STEP_COUNT = 2  # Typically 100.
        # slowness = 10000000 # 10 ms, where 1 second is real life simulation
        # slowness = 1000000 # 1 ms, where 1 second is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation
        # slowness = 10 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/mara_controller/command'
        JOINT_SUBSCRIBER = '/mara_controller/state'

        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        MOTOR4_JOINT = 'motor4'
        MOTOR5_JOINT = 'motor5'
        MOTOR6_JOINT = 'motor6'

        # Set constants for links
        TABLE = 'table'

        BASE = 'base_link'

        MARA_MOTOR1_LINK = 'motor1_link'
        MARA_MOTOR2_LINK = 'motor2_link'
        MARA_MOTOR3_LINK = 'motor3_link'
        MARA_MOTOR4_LINK = 'motor4_link'
        MARA_MOTOR5_LINK = 'motor5_link'
        MARA_MOTOR6_LINK = 'motor6_link'
        EE_LINK = 'ee_link'

        # EE_LINK = 'ee_link'
        JOINT_ORDER = [
            MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT,
            MOTOR5_JOINT, MOTOR6_JOINT
        ]
        LINK_NAMES = [
            TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK,
            MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = rospkg.RosPack().get_path(
            "mara_description") + "/urdf/mara_demo_camera_top.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)
        self.realgoal = ee_tgt
        self.target_orientation = ee_rot_tgt

        self.environment = {
            # rk changed this to for the mlsh
            # 'ee_points_tgt': ee_tgt,
            'ee_points_tgt': self.realgoal,
            'ee_point_tgt_orient': self.target_orientation,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory)
        self._sub = rospy.Subscriber(JOINT_SUBSCRIBER,
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState,
                                          self.collision_callback)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # observation = self.take_observation()
        # assert not done
        # self.obs_dim = observation.size
        """
        obs_dim is defined as:
        num_dof + end_effector_points=3 + end_effector_velocities=3
        end_effector_points and end_effector_velocities is constant and equals 3
        recently also added quaternion to the obs, which has dimension=4
        """
        #
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 7  #6 hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi * np.ones(self.scara_chain.getNrOfJoints())
        # low = -np.pi * np.ones(self.scara_chain.getNrOfJoints())
        # high = np.pi * np.ones(self.scara_chain.getNrOfJoints())
        # low = -np.inf * np.ones(self.scara_chain.getNrOfJoints())
        # high = np.inf * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                            SpawnModel)
        self.remove_model = rospy.ServiceProxy('/gazebo/delete_model',
                                               DeleteModel)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)
        self.reset_world = rospy.ServiceProxy('/gazebo/reset_world', Empty)

        model_xml = "<?xml version=\"1.0\"?> \
                    <robot name=\"myfirst\"> \
                      <link name=\"world\"> \
                      </link>\
                      <link name=\"cylinder0\">\
                        <visual>\
                          <geometry>\
                            <sphere radius=\"0.01\"/>\
                          </geometry>\
                          <origin xyz=\"0 0 0\"/>\
                          <material name=\"rojotransparente\">\
                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                          </material>\
                        </visual>\
                        <inertial>\
                          <mass value=\"5.0\"/>\
                          <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                        </inertial>\
                      </link>\
                      <joint name=\"world_to_base\" type=\"fixed\"> \
                        <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                        <parent link=\"world\"/>\
                        <child link=\"cylinder0\"/>\
                      </joint>\
                      <gazebo reference=\"cylinder0\">\
                        <material>Gazebo/GreenTransparent</material>\
                      </gazebo>\
                    </robot>"

        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0, 0]
        pose.position.y = EE_POS_TGT[0, 1]
        pose.position.z = EE_POS_TGT[0, 2]

        #Static obstacle (not in original code)
        # pose.position.x = 0.25;#
        # pose.position.y = 0.07;#
        # pose.position.z = 0.0;#

        pose.orientation.x = 0
        pose.orientation.y = 0
        pose.orientation.z = 0
        pose.orientation.w = 0
        reference_frame = ""
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        self.add_model(model_name="target",
                       model_xml=model_xml,
                       robot_namespace="",
                       initial_pose=pose,
                       reference_frame="")

        # Seed the environment
        # Seed the environment
        self._seed()

    # def collision_callback(self, message):
    #     """
    #     Callback method for the subscriber of Collision data
    #     """
    #
    #
    #     if "puzzle_ball_joints::cubie" not in message.collision1_name and "puzzle_ball_joints::cubie" not in message.collision2_name:
    #         if "robot::motor6_link::motor6_link_fixed_joint_lump__robotiq_arg2f_base_link_collision_1" not in message.collision1_name and  "robot::left_outer_finger::left_outer_finger_collision" not in message.collision2_name:
    #             if "puzzle_ball_joints::cubie" not in message.collision1_name or  "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name:
    #                 if message.collision1_name and message.collision2_name:
    #                     self._collision_msg =  message
    #                     # print("self._collision_msg: ", self._collision_msg)

    def collision_callback(self, message):
        """
        Callback method for the subscriber of Collision data
        """
        self._collision_msg = None
        if message.collision1_name is not message.collision2_name:
            if "puzzle_ball_joints::cubie" not in message.collision1_name and "puzzle_ball_joints::cubie" not in message.collision2_name:
                if "puzzle_ball_joints::cubie" not in message.collision1_name or "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name:
                    if "robot::motor6_link::motor6_link_fixed_joint_lump__robotiq_arg2f_base_link_collision_1" not in message.collision1_name and "robot::left_outer_finger::left_outer_finger_collision" not in message.collision2_name:
                        self._collision_msg = message

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message

    def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True):
        self.slowness = slowness
        self.slowness_unit = slowness_unit
        self.reset_jnts = reset_jnts
        print("slowness: ", self.slowness)
        print("slowness_unit: ", self.slowness_unit, "type of variable: ",
              type(slowness_unit))
        print("reset joints: ", self.reset_jnts, "type of variable: ",
              type(self.reset_jnts))

    def randomizeTargetPositions(self):
        """
        The goal is to test with randomized positions which range between the boundries of the H-ROS logo
        """
        print("In randomize target positions.")
        EE_POS_TGT_RANDOM1 = np.asmatrix([
            np.random.uniform(0.2852485, 0.3883636),
            np.random.uniform(-0.1746508, 0.1701576), 0.2868
        ])  # boundry box of the first half H-ROS letters with +-0.01 offset
        EE_POS_TGT_RANDOM2 = np.asmatrix([
            np.random.uniform(0.2852485, 0.3883636),
            np.random.uniform(-0.1746508, 0.1701576), 0.2868
        ])  # boundry box of the H-ROS letters with +-0.01 offset
        # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1
        ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT
        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T)

        # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)

        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeTarget(self):
        print("calling randomize target")

        EE_POS_TGT_1 = np.asmatrix([-0.189383, -0.123176, 0.894476])  # point 1
        EE_POS_TGT_2 = np.asmatrix([-0.359236, 0.0297278, 0.760402])  # point 2
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeMultipleTargets(self):
        print("calling randomize multiple target")

        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366,
                                    0.2868])  # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121,
                                    0.2868])  # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:self.scara_chain.getNrOfJoints()],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here
            # current_quaternion = quaternion_from_matrix(rotation_matrix)
            # tgt_quartenion = quaternion_from_matrix(self.target_orientation)

            current_quaternion = quat.from_rotation_matrix(rotation_matrix)
            # print("current_quaternion: ", current_quaternion)
            tgt_quartenion = quat.from_rotation_matrix(self.target_orientation)

            # A  = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T

            #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0]

            # this is wrong!!!! Substraction should be replaced by: quat_error = current_quaternion * tgt_quartenion.conjugate()
            # quat_error = current_quaternion - tgt_quartenion

            quat_error = current_quaternion * tgt_quartenion.conjugate()
            # convert quat to np arrays
            quat_error = quat.as_float_array(quat_error)

            # RK:  revisit this later, we only take one angle difference here!
            angle_diff = 2 * np.arccos(np.clip(quat_error[..., 0], -1., 1.))
            # print("quat error: ", quat_error)
            # print("quat_error[..., 0]: ", quat_error[..., 0])
            # print("quat_error: ",quat_error)
            # print("angle_diff: ", angle_diff)
            # print("self.realgoal: ", self.realgoal)
            # print("curr quat: ", current_quaternion)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.realgoal  #self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(angle_diff, -1),
                          np.reshape(ee_velocities, -1), ]
            # print("quat_error: ", quat_error)
            # print("ee_points:", ee_points)
            # print("angle_diff: ", angle_diff)
            return np.r_[np.reshape(last_observations, -1),
                         np.reshape(ee_points, -1),
                         np.reshape(angle_diff, -1),
                         np.reshape(ee_velocities, -1), ]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action, prevac):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator += 1
        # rmse_trans = self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])
        # rmse_orient = self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])
        # # print("rmse_orient: ", self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])
        # self.reward_dist = -rmse_trans
        # self.reward_orient = -rmse_orient
        #                     # + self.ob[(self.scara_chain.getNrOfJoints()+4)] )
        #
        # # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        # if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005):
        #     self.reward_final_dist = 1 + self.reward_dist # Make the reward increase as the distance decreases
        #     print("Reward Pose is: ", self.reward_final_dist)
        # else:
        #     self.reward_final_dist = self.reward_dist

        self.reward_dist = -self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints():
                    (self.scara_chain.getNrOfJoints() + 3)])
        # careful we have degrees now so we scale with
        orientation_scale = 0.01

        # print("orientation reward: ", self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+4)])
        self.reward_orient = -orientation_scale * abs(
            self.ob[self.scara_chain.getNrOfJoints() +
                    3:(self.scara_chain.getNrOfJoints() + 4)]
        )  #self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+4)])*0.1
        # print("self.reward_orient: ", self.reward_orient)
        # print(self.reward_orient)
        # print("self.reward_orient: ", self.reward_orient)

        #scale here the orientation because it should not be the main bias of the reward, position should be
        collided = False

        if self._collision_msg is not None and self._collision_msg.collision1_name and self._collision_msg.collision2_name:
            # print("\ncollision detected: ", self._collision_msg)
            print("Collision detected")
            collided = True
            self.reward = (self.reward_dist + self.reward_orient) * 4.0
            # print("Reward collision is: ", self.reward)

            # Resets the state of the environment and returns an initial observation.
            # we should avoid this --> huge bottleneck
            rospy.wait_for_service('/gazebo/reset_simulation')
            try:
                self.reset_proxy()
                # go to the previous state before colliding
                self._pub.publish(
                    self.get_trajectory_message(
                        prevac[:self.scara_chain.getNrOfJoints()]))
            except (rospy.ServiceException) as e:
                print("/gazebo/reset_simulation service call failed")
                # self.goToInit()
                # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_world', Empty)

        else:
            # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
            # here is the distance block
            if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(
                    self.scara_chain.getNrOfJoints() + 3)]) < 0.01):
                self.reward = 1 + self.reward_dist  # Make the reward increase as the distance decreases
                print("Reward is: ", self.reward)
            else:
                self.reward = self.reward_dist
                # print("Reward is (minus): ", self.reward)

            # take into account the orientation
            if (abs(self.reward_orient) < 0.005):
                self.reward = self.reward + (1 + self.reward_orient)
                print("Reward orientation is: ", self.reward)
            else:
                self.reward = self.reward + self.reward_orient  # * self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])
                # print("Reward orientation is (minus): ", self.reward)

            if self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(
                    self.scara_chain.getNrOfJoints() + 3)]) < 0.01 and abs(
                        self.reward_orient) < 0.005:
                self.reward = 10 * (2 + self.reward_dist + self.reward_orient)
                print("Reward hit the target, and is: ", self.reward)

        # self.reward =self.reward - abs(self.ob[(self.scara_chain.getNrOfJoints()+4)])
        # Calculate if the env has been solved
        done = bool(((abs(self.reward_dist) < 0.01) and
                     (abs(self.reward_orient)) < 0.005)
                    or (self.iterator > self.max_episode_steps))

        if not collided:
            # Execute "action"
            self._pub.publish(
                self.get_trajectory_message(
                    action[:self.scara_chain.getNrOfJoints()]))

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, collided, {}

    def goToInit(self):
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()
        # # Go to initial position and wait until it arrives there
        # Wait until the arm is within epsilon of reset configuration.
        self._time_lock.acquire(True, -1)
        with self._time_lock:
            self._currently_resetting = True
        self._time_lock.release()

        if self._currently_resetting:
            epsilon = 1e-3
            reset_action = self.environment['reset_conditions'][
                'initial_positions']
            now_action = self._observation_msg.actual.positions
            du = np.linalg.norm(reset_action - now_action, float(np.inf))
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))
            if du > epsilon:
                self._currently_resetting = True
                self._pub.publish(
                    self.get_trajectory_message(
                        self.environment['reset_conditions']
                        ['initial_positions']))
                time.sleep(3)

    def _reset(self):
        """
        Reset the agent for a particular experiment condition.
        """

        self.iterator = 0

        if self.reset_jnts is True:
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))
            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                time.sleep(int(self.slowness))
            elif (self.slowness_unit == 'nsec'):
                time.sleep(int(self.slowness /
                               1000000000))  # using nanoseconds
            else:
                print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
class GazeboModularScara4DOFv3Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "ModularScara4_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 100  # this is specific parameter for the acktr algorithm. Not used in ppo1, trpo...

        self._time_lock = threading.RLock()

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O
        EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121,
                                  0.4868])  # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([2.0, -2.0, -2.0, 0.])
        # Used to initialize the robot, #TODO, clarify this more
        # STEP_COUNT = 2  # Typically 100.
        slowness = 10000000  # 1 is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation
        # slowness = 10 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        MOTOR4_JOINT = 'motor4'

        # Set constants for links
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'

        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'

        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'

        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'

        SCARA_MOTOR4 = 'scara_e1_motor4'
        SCARA_INSIDE_MOTOR4 = 'scara_e1_motor4_inside'
        SCARA_SUPPORT_MOTOR4 = 'scara_e1_motor4_support'
        SCARA_BAR_MOTOR4 = 'scara_e1_bar4'
        SCARA_FIXBAR_MOTOR4 = 'scara_e1_fixbar4'

        SCARA_RANGEFINDER = 'scara_e1_rangefinder'

        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT]
        LINK_NAMES = [
            BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1,
            SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
            SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2,
            SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3,
            SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, SCARA_BAR_MOTOR3,
            SCARA_FIXBAR_MOTOR3, SCARA_MOTOR4, SCARA_INSIDE_MOTOR4,
            SCARA_SUPPORT_MOTOR4, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_4joints.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.environment = {
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command',
                                    JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state',
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # observation = self.take_observation()
        # assert not done
        # self.obs_dim = observation.size
        """
        obs_dim is defined as:
        num_dof + end_effector_points=3 + end_effector_velocities=3
        end_effector_points and end_effector_velocities is constant and equals 3
        """
        #
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 6  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # # Gazebo specific services to start/stop its behavior and
        # # facilitate the overall RL environment
        # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty)

        # Seed the environment
        self._seed()

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        # target.time_from_start.secs = self.environment['slowness']
        target.time_from_start.nsecs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present 6 joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:self.scara_chain.getNrOfJoints()],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1), ]

            return np.r_[np.reshape(last_observations, -1),
                         np.reshape(ee_points, -1),
                         np.reshape(ee_velocities, -1), ]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        # self.ob = take_observation()

        # self.reward_dist = self.rmse_func(self.ob[3:6])
        # # print("reward_dist :", self.reward_dist)
        # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases
        #
        # # Calculate if the env has been solved
        # done = bool(abs(self.reward_dist) < 0.005)

        self.reward_dist = -self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints():
                    (self.scara_chain.getNrOfJoints() + 3)])

        # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(
                self.scara_chain.getNrOfJoints() + 3)]) < 0.005):
            self.reward = 1 - self.rmse_func(
                self.ob[self.scara_chain.getNrOfJoints():(
                    self.scara_chain.getNrOfJoints() +
                    3)])  # Make the reward increase as the distance decreases
            print("Reward is: ", self.reward)
        else:
            self.reward = self.reward_dist

        # print("reward: ", self.reward)
        # print("rmse_func: ", self.rmse_func(ee_points))

        # Calculate if the env has been solved
        done = bool(abs(self.reward_dist) < 0.005)

        # # Unpause simulation
        # rospy.wait_for_service('/gazebo/unpause_physics')
        # try:
        #     self.unpause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/unpause_physics service call failed")

        # Execute "action"
        self._pub.publish(
            self.get_trajectory_message(
                action[:self.scara_chain.getNrOfJoints()]))
        #TODO: wait until action gets executed
        # When adding this the algorithm does not converge
        # time.sleep(int(self.environment['slowness']))
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds

        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def goToInit(self):
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()
        # # Go to initial position and wait until it arrives there
        # Wait until the arm is within epsilon of reset configuration.
        self._time_lock.acquire(True, -1)
        with self._time_lock:
            self._currently_resetting = True
        self._time_lock.release()

        if self._currently_resetting:
            epsilon = 1e-3
            reset_action = self.environment['reset_conditions'][
                'initial_positions']
            now_action = self._observation_msg.actual.positions
            du = np.linalg.norm(reset_action - now_action, float(np.inf))
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))
            if du > epsilon:
                self._currently_resetting = True
                self._pub.publish(
                    self.get_trajectory_message(
                        self.environment['reset_conditions']
                        ['initial_positions']))
                time.sleep(3)

    def _reset(self):
        # """
        # Reset the agent for a particular experiment condition.
        # """
        # # Resets the state of the environment and returns an initial observation.
        # rospy.wait_for_service('/gazebo/reset_simulation')
        # try:
        #     #reset_proxy.call()
        #     self.reset_proxy()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/reset_simulation service call failed")
        #
        # # Unpause simulation
        # rospy.wait_for_service('/gazebo/unpause_physics')
        # try:
        #     self.unpause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/unpause_physics service call failed")

        # time.sleep(int(self.environment['slowness']))
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds

        # # Pause the simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # self.goToInit()

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
Esempio n. 4
0
class GazeboModularScara3DOFStaticObstaclev1Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        #gazebo_env.GazeboEnv.__init__(self, "ModularScara3_Obstacles_v0.launch")
        gazebo_env.GazeboEnv.__init__(self, "ModularScara3_Obstacles_urdf_simplified_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self._collision_msg = None ###
        self._torque_msg = None ###
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000 # limit the max episode step
        # class variable that iterates to accounts for number of steps per episode
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'

        self.mod = 100
        self.init_torque = 0.0
        self.init_torque_array = np.zeros(3)
        self.ee_point_matrix = []
        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.26342]) # 26342 center of S
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.

        # make sure that the slowness is set properly!! In case we forget them defaults to seconds.
        # if self.slowness is None:
        #     slowness = 1
        #
        # else:
        #     slowness = self.slowness



        # slowness = 100000000 # 1 is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        MOTOR3_TORQUE_SUBSCRIBER = '/motor3_torque'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base'
        # BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        # SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        # SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        # SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        # SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        # SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        # SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        # SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        # SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        # SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        # SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        # SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        # SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [SCARA_MOTOR1,SCARA_MOTOR2, SCARA_MOTOR3, SCARA_RANGEFINDER, EE_LINK]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
             'initial_velocities': []
        }
        #############################
        rospack = rospkg.RosPack()
        prefix_path = rospack.get_path('scara_e1_description')

        if(prefix_path==None):
            print("I can't find scara_e1_description")
            sys.exit(0)

        URDF_PATH = prefix_path + "/urdf/scara_e1_3joints_simplified.urdf"
        #URDF_PATH = prefix_path + "/urdf/scara_e1_3joints.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.realgoal = ee_tgt

        self.environment = {
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback)

        # self._sub_coll = rospy.Subscriber('/gazebo_contacts',ContactState, self.collision_callback) ##

        self._sub_torque = rospy.Subscriber('/motor3_torque',WrenchStamped, self.torque_callback) ##

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        # print("self.environment['link_names'][0]:", self.environment['link_names'][0])
        # print("self.environment['link_names'][-1]:", self.environment['link_names'][-1])
        self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1])
        print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        print("nr of joints: ", self.scara_chain.getNrOfJoints())

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        #low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        #high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        low = -np.pi * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty)
        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel)
        self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel)

        model_xml = "<?xml version=\"1.0\"?> \
                    <robot name=\"myfirst\"> \
                      <link name=\"world\"> \
                      </link>\
                      <link name=\"cylinder0\">\
                        <visual>\
                          <geometry>\
                            <sphere radius=\"0.01\"/>\
                          </geometry>\
                          <origin xyz=\"0 0 0\"/>\
                          <material name=\"rojotransparente\">\
                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                          </material>\
                        </visual>\
                        <inertial>\
                          <mass value=\"5.0\"/>\
                          <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                        </inertial>\
                      </link>\
                      <joint name=\"world_to_base\" type=\"fixed\"> \
                        <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                        <parent link=\"world\"/>\
                        <child link=\"cylinder0\"/>\
                      </joint>\
                      <gazebo reference=\"cylinder0\">\
                        <material>Gazebo/GreenTransparent</material>\
                      </gazebo>\
                    </robot>"
        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0,0];
        pose.position.y = EE_POS_TGT[0,1];
        pose.position.z = 0.055#EE_POS_TGT[0,2];

        #Static obstacle (not in original code)
        # pose.position.x = 0.25;#
        # pose.position.y = 0.07;#
        # pose.position.z = 0.0;#

        pose.orientation.x = 0;
        pose.orientation.y= 0;
        pose.orientation.z = 0;
        pose.orientation.w = 0;
        reference_frame = ""
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        self.add_model(model_name="target",
                        model_xml=model_xml,
                        robot_namespace="",
                        initial_pose=pose,
                        reference_frame="")

        #self.addObstacle()

        # Seed the environment
        self._seed()

        # self.max_steps_episode = 1000
    def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=False):
            self.slowness = slowness
            self.slowness_unit = slowness_unit
            print("slowness: ", self.slowness)
            print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit))


    def randomizeObstacle(self):
        print("calling randomize obstacle")
        #EE_POS_TGT_1 = np.asmatrix([0.3239543, 0.0083323, 0.3746]) #center of R
        #EE_POS_TGT_1 = np.asmatrix([0.29557, -0.0422738, 0.3746]) # R top left
        EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571, 0.26342]) #center of S 0.26342
        #EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571, 0.3746]) # S center

        #EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.26342]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)

        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        #self.realgoal = target1 if np.random.uniform() < 0.5 else target2


        if np.random.uniform() < 0.5:
             self.addObstacle()
             #self.realgoal = target2
             self.realgoal = target1
             print("\n\nOBSTACLE\n\n")
        else:
             # self.removeObstacle()
             self.addObstacle()
             self.realgoal = target1
             print("\n\nNO OBSTACLE\n\n")

        print("randomizeObstacle realgoal: ", self.realgoal)
        #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])])
        # 0 = obstacle. 1 = no obstacle.
        # self.realgoal = 0
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H

    # RK: do we need this function?
    def setPenalizationMod(self, pen_mod):
        if pen_mod == 1:
            self.mod = 100
        elif pen_mod == 2:
            self.mod = 200
        elif pen_mod == 3:
            self.mod = 300
        elif pen_mod == 4:
            self.mod = 400
        elif pen_mod == 5:
            self.mod = 500
        elif pen_mod == 6:
            self.mod = 600
        elif pen_mod == 7:
            self.mod = 700
        elif pen_mod == 8:
            self.mod = 800


    #def randomizeCorrect(self):
    def randomizeTarget(self):
        print("calling randomize correct")
        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)

        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)
        #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])])
        # 0 = obstacle. 1 = no obstacle.
        # self.realgoal = 0
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H

    def collision_callback(self, message):
        """
        Callback method for the subscriber of Collision data
        """
        # print("\ncollision: ", self._collision_msg)
        self._collision_msg =  message
    def torque_callback(self, message):
        """
        Callback method for the subscriber of Collision data
        """
        self._torque_msg =  message
# <<<<<<< HEAD
#         if self.init_torque == 0.0:
#             self.init_torque_array = [message.wrench.torque.x, message.wrench.torque.y, message.wrench.torque.z]
#             self.init_torque = np.linalg.norm(self.init_torque_array)
#             print("Init force at motor 3 is: ", self.init_torque)
#         # print("\nTorque: ", self._torque_msg)
# =======
#         #print("\nTorque: ", self._torque_msg)
# >>>>>>> 5534657d7384ea47ae3ebbf2ddc1ff9b68d9a0be


    def normals_callback(self, message):
        """
        Callback method for the subscriber of Collision data
        """

        self._normals_msg =  message


    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg =  message

    def get_trajectory_message(self, action,set_const_vel=False, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        if set_const_vel:
            target.velocities = 0.0005*np.ones(self.scara_chain.getNrOfJoints())
            target.effort = 0.005*np.ones(self.scara_chain.getNrOfJoints())
            target.time_from_start.secs = 4
        else:
            # These times determine the speed at which the robot moves:
            # it tries to reach the specified target position in 'slowness' time.
            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                target.time_from_start.secs = self.slowness
            elif (self.slowness_unit == 'nsec'):
                target.time_from_start.nsecs = self.slowness
            else:
                print("Unrecognized unit. Please use sec or nsec.")
        # target.time_from_start.nsecs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty");
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(map(lambda x,y: x in y, message.joint_names,
                    [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(message.actual.positions) # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3),
                                                       ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message, self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(self.scara_chain,
                                        self.environment['link_names'],
                                        last_observations[:self.scara_chain.getNrOfJoints()],
                                        base_link=self.environment['link_names'][0],
                                        end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            self.current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'],
                                                              trans,
                                                              rot).T)
            ee_points = self.current_ee_tgt - self.realgoal#self.environment['ee_points_tgt']

            # #calculate more sophisticated distance measurement
            # self.ee_point_matrix.append(np.array(ee_points))
            # # print("\nself.ee_point_matrix: ", self.ee_point_matrix)
            # if len(self.ee_point_matrix)>3:
            #     # d = pdist(self.ee_point_matrix,'mahalanobis', VI=None)
            #     print("len(self.ee_point_matrix): ",len(self.ee_point_matrix), "self.ee_point_matrix: ", self.ee_point_matrix)
            # if len(self.ee_point_matrix)>10:
            #     self.ee_point_matrix = []



            # print("current_ee_tgt: ", current_ee_tgt)
            # print("ee_points:", ee_points)
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians,
                                                                   self.environment['end_effector_points'],
                                                                   rot)
            ee_velocities = self.get_ee_points_velocities(ee_link_jacobians,
                                                           self.environment['end_effector_points'],
                                                           rot,
                                                           last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1),]

            return np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1),]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse
    def log_torque_func(self, torque_diff):
        """
        Computes the Log of the Eucledian Distance Error between current and desired end-effector position
        """
        log_dist = np.log(torque_diff, dtype=np.float32)
        log_dist[log_dist == -np.inf] = 0.0
        log_dist = np.nan_to_num(log_dist)

        return log_dist

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator+=1

        self.reward_dist = -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])

        # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005):
            self.reward = 1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases
            print("Reward is: ", self.reward)
        # elif(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.08):
        #     self.reward = 0.5 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases
        #     print("Reward is: ", self.reward)
        else:
            self.reward = self.reward_dist

        # d = pdist(self.ee_point_matrix,'mahalanobis', VI=None)
        # print("d_hamming: ", d)

        # Calculate if the env has been solved
# <<<<<<< HEAD
#         done = (bool(abs(self.reward_dist) < 0.008)) or (self.iterator > self.max_episode_steps)
#
#
#
#
#         #calculate the linnorm of the force at the motor3
#         #1. put current torques in an array
#         curr_torque_array = [self._torque_msg.wrench.torque.x, self._torque_msg.wrench.torque.y, self._torque_msg.wrench.torque.z]
#         torque_motor3  = np.linalg.norm(curr_torque_array)
#
#         diff_torques = np.subtract(curr_torque_array, self.init_torque_array)
#
#         log_torques = self.log_torque_func(diff_torques)
#         # print("log_torques: ", log_torques)
#         self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))
#
#         if self.init_torque > 0 and torque_motor3 > self.init_torque:
#             # print("collision detected with torque at motor3: ", torque_motor3, "init_torque: ", self.init_torque)
#             proportion_torque = np.divide(torque_motor3,self.init_torque)
#             # print("diff between init force and collision: ", proportion_force)
#             if proportion_torque > 2.0: #and proportion_force <= 100.0:
#                 self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()], set_const_vel=True))
#                 self.reward = self.reward - np.divide((proportion_torque + 0.5*np.linalg.norm(log_torques)),1000)
#                 # print("c1 self.reward:", self.reward)
#                 if(proportion_torque <5 and (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.08)):
#                     self.reward = 0.5 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) - np.divide((proportion_torque + 0.5*np.linalg.norm(log_torques)),1000)
#                     print("c2 self.reward:", self.reward)
#
#
#
#         print("at end reward: ", self.reward)
#
#         # print("force_motor3: ", force_motor3)
#
#         #  #REWARD SHAPING sophisticated penalization based on https://arxiv.org/pdf/1609.07845.pdf
#         # if abs(self._torque_msg.wrench.force.x) > 100:
#         #     print("self.self._torque_msg.force.x>100: ", self._torque_msg.wrench.force.x)
#
#                 # # contact_depths = 100 * self._collision_msg.depths[0] #make them in mm otherwise we have too many decimals
#                 # print("\self.self._torque_msg", self.self._torque_msg, "reward: ", self.reward)
#                 # # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))
#                 # #we always assume that depths is positive here. Sometimes depths is negative (actually most of the time).
#                 # # changing to abs value of depths
#                 # if contact_depths > 0 and  abs(contact_depths) < 0.0001:
#                 #     self.reward = self.reward - (abs(contact_depths))/2
#                 #     print("\n cond1, contact_depths", contact_depths, "reward: ", self.reward)
#                 # elif abs(contact_depths) > 0.0001: #contact_depths > 0 and
#                 #     self.reward = self.reward - abs(contact_depths)
#                 #     print("\n cond2, contact_depths", contact_depths, "reward: ", self.reward)
#                 # else:
#                 #     # self.reward = -100
#                 #     print("cond3, self._collision_msg.depths[0]:", contact_depths)
# =======
        done = (bool(abs(self.reward_dist) < 0.005)) or (self.iterator > self.max_episode_steps)

        #  COLLISION DEPTH BASED REWARD SHAPING
        #  #REWARD SHAPING sophisticated penalization based on https://arxiv.org/pdf/1609.07845.pdf
        # if self._collision_msg.collision2_name or self._collision_msg.collision1_name: #else
        #         contact_depths = 100 * self._collision_msg.depths[0] #make them in mm otherwise we have too many decimals
        #         print("\ncontact_depths", contact_depths, "reward: ", self.reward)
        #         # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))
        #         #we always assume that depths is positive here. Sometimes depths is negative (actually most of the time).
        #         # changing to abs value of depths
        #         if contact_depths > 0 and  abs(contact_depths) < 0.0001:
        #             self.reward = self.reward - (abs(contact_depths))/2
        #             print("\n cond1, contact_depths", contact_depths, "reward: ", self.reward)
        #         elif abs(contact_depths) > 0.0001: #contact_depths > 0 and
        #             self.reward = self.reward - abs(contact_depths)
        #             print("\n cond2, contact_depths", contact_depths, "reward: ", self.reward)
        #         else:
        #             # self.reward = -100
        #             print("cond3, self._collision_msg.depths[0]:", contact_depths)

        #  TORQUE BASED REWARD SHAPING
        if self._torque_msg is not None:
            torque_x = self._torque_msg.wrench.torque.x
            torque_y = self._torque_msg.wrench.torque.y
            torque_z = self._torque_msg.wrench.torque.z
            torque_value = np.sqrt(torque_x * torque_x + torque_y * torque_y + torque_z * torque_z) / 1000
            print("\n Torque value:", torque_value)
            if torque_value > 0.01 and torque_value < 0.1:
                    self.reward = self.reward - (abs(torque_value)) / 2
                    print("\n Reward, torque penalization", self.reward)
            elif torque_value > 0.01 and torque_value > 0.1:
                    self.reward = self.reward - (abs(torque_value))
                    print("\n Reward, torque penalization", self.reward)
# >>>>>>> 5534657d7384ea47ae3ebbf2ddc1ff9b68d9a0be


        # # Take an observation
        self.ob = self.take_observation()
        while(self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observations, rewards, etc.
        ee_point = self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)] + self.realgoal
        ee_point_eucledian = np.linalg.norm(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])
        #return self.ob, self.reward, done, ee_point,{}

        # #calculate more sophisticated distance measurement
        # self.ee_point_matrix.append(np.array(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]))
        # # print("\nself.ee_point_matrix: ", self.ee_point_matrix)
        # if len(self.ee_point_matrix)>3:
        #     # d = pdist(self.ee_point_matrix,'mahalanobis', VI=None)
        #     print("len(self.ee_point_matrix): ",len(self.ee_point_matrix), "self.ee_point_matrix: ", self.ee_point_matrix)
        # if len(self.ee_point_matrix)>10:
        #     self.ee_point_matrix = []
        return self.ob, self.reward, done, {}
    def addObstacle(self):
            rospy.wait_for_service('/gazebo/spawn_urdf_model')
            try:
                # #NORMAL BOX
                model_xml = "<?xml version=\"1.0\"?> \
                                    <robot name=\"myfirst\"> \
                                      <link name=\"world\"> \
                                      </link>\
                                      <link name=\"cylinder0\">\
                                        <visual>\
                                          <geometry>\
                                            <box size=\".1 .1 1\"/>\
                                          </geometry>\
                                          <origin xyz=\"0 0 0\"/>\
                                          <material name=\"blue\">\
                                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                                          </material>\
                                        </visual>\
                                        <inertial>\
                                          <mass value=\"10.\"/>\
                                          <inertia ixx=\"1\" ixy=\".0\" ixz=\"0.0\" iyy=\"1\" iyz=\"0.0\" izz=\"1\"/>\
                                        </inertial>\
                                        <collision>\
                                          <geometry>\
                                            <box size=\".1 .1 1\"/>\
                                          </geometry>\
                                          <surface>\
                                            <contact>\
                                              <ode>\
                                                <min_depth>\"0.001\"</min_depth>\
                                                <max_depth>\"0.01\"</max_depth>\
                                                <kp>\"1e6\"</kp>\
                                                <kd>\"1.000000\"</kd>\
                                                <max_velocity>\"1.000000\"</max_velocity>\
                                              </ode>\
                                             </contact>\
                                          </surface>\
                                       </collision>\
                                      </link>\
                                      <joint name=\"world_to_base\" type=\"fixed\"> \
                                        <origin xyz=\"0 0 0.5\" rpy=\"0 0 0\"/>\
                                        <parent link=\"world\"/>\
                                        <child link=\"cylinder0\"/>\
                                      </joint>\
                                      <gazebo reference=\"cylinder0\">\
                                        <material>Gazebo/Blue</material>\
                                      </gazebo>\
                                    </robot>"
                robot_namespace = ""
                pose = Pose()
                number = random.randint(0, 4)
                #pose.position.x = 0.15;#
                pose.position.x = 0.3;#
                pose.position.y = 0.07;#
                pose.position.z = 0.05;
                pose.orientation.x = 0;
                pose.orientation.y= 0;
                pose.orientation.z = 0;
                pose.orientation.w = 0;

                #BOX
                # model_xml = "<?xml version=\"1.0\"?> \
                #              <robot name=\"myfirst\"> \
                #                   <link name=\"world\"> \
                #                   </link>\
                #                   <link name=\"box0\">\
                #                     <visual>\
                #                       <geometry>\
                #                         <box>\
                #                             <size>0.2 0.2 1.0</size>\
                #                         </box>\
                #                       </geometry>\
                #                       <origin xyz=\"0 0 0\"/>\
                #                       <material name=\"blue\">\
                #                           <ambient>0.5 0.5 1.0 0.1</ambient>\
                #                           <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                #                       </material>\
                #                     </visual>\
                #                     <inertial>\
                #                       <mass value=\"5.0\"/>\
                #                       <inertia>\
                #                         <ixx>1.0</ixx>\
                #                         <ixy>0</ixy>\
                #                         <ixz>0</ixz>\
                #                         <iyy>1.0</iyy>\
                #                         <iyz>0</iyz>\
                #                         <izz>1.0</izz>\
                #                       </inertia>\
                #                     </inertial>\
                #                     <collision>\
                #                       <geometry>\
                #                         <box>\
                #                             <size>0.2 0.2 1.0</size>\
                #                         </box>\
                #                       </geometry>\
                #                     </collision>\
                #                   </link>\
                #                   <joint name=\"world_to_base\" type=\"fixed\"> \
                #                     <origin xyz=\"0 0 0.7\" rpy=\"0 0 0\"/>\
                #                     <parent link=\"world\"/>\
                #                     <child link=\"box0\"/>\
                #                   </joint>\
                #                   <gazebo reference=\"box0\">\
                #                     <material>Gazebo/Blue</material>\
                #                   </gazebo>\
                #                 </robot>"

                # model_xml = "<?xml version=\"1.0\"?> \
                #             <robot name=\"myfirst\"> \
                #               <link name=\"world\"> \
                #               </link>\
                #               <link name=\"cylinder0\">\
                #                 <visual>\
                #                   <geometry>\
                #                     <sphere radius=\"0.01\"/>\
                #                   </geometry>\
                #                   <origin xyz=\"0 0 0\"/>\
                #                   <material name=\"rojotransparente\">\
                #                       <ambient>0.5 0.5 1.0 0.1</ambient>\
                #                       <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                #                   </material>\
                #                 </visual>\
                #                 <inertial>\
                #                   <mass value=\"5.0\"/>\
                #                   <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                #                 </inertial>\
                #               </link>\
                #               <joint name=\"world_to_base\" type=\"fixed\"> \
                #                 <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                #                 <parent link=\"world\"/>\
                #                 <child link=\"cylinder0\"/>\
                #               </joint>\
                #               <gazebo reference=\"cylinder0\">\
                #                 <material>Gazebo/GreenTransparent</material>\
                #               </gazebo>\
                #             </robot>"

                # robot_namespace = ""
                # pose = Pose()
                # number = random.randint(0, 4)
                # # if number == 1:
                # #     pose.position.x = 0.25;
                # #     pose.position.y = 0.07;
                # # if number == 2:
                # #     pose.position.x = 0.25;
                # #     pose.position.y = -0.07;
                # # if number == 3:
                # #     pose.position.x = 0.23;
                # #     pose.position.y = -0.087;
                # # if number == 4:
                # #     pose.position.x = 0.23;
                # #     pose.position.y = 0.087;
                #
                # pose.position.x = 0.25;#
                # pose.position.y = 0.07;#
                #
                # pose.position.z = 0.0;
                # pose.orientation.x = 0;
                # pose.orientation.y= 0;
                # pose.orientation.z = 0;
                # pose.orientation.w = 0;
                reference_frame = ""
                if number > -1:
                  self.add_model(model_name="obstacle",
                                model_xml=model_xml,
                                robot_namespace=robot_namespace,
                                initial_pose=pose,
                                reference_frame="")
            except (rospy.ServiceException) as e:
                print ("/gazebo/spawn_urdf_model service call failed")

    def removeObstacle(self):
            rospy.wait_for_service('/gazebo/delete_model')
            try:
                self.remove_model(model_name="obstacle")
            except (rospy.ServiceException) as e:
                print ("/gazebo/spawn_urdf_model service call failed")

    def _reset(self):
        # """
        # Reset the agent for a particular experiment condition.
        # """
        self.iterator = 0
        msg = self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])
        msg.points[0].time_from_start.secs = 0
        msg.points[0].time_from_start.nsecs = 10000
        self._pub.publish(msg)

        # time.sleep(int(self.environment['slowness'])) # using seconds
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            time.sleep(int(self.slowness))
        elif(self.slowness_unit == 'nsec'):
            time.sleep(int(self.slowness/1000000000)) # using nanoseconds
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while(self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
class GazeboMARATopCollisionv0Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "MARATop6DOF_Collision_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000  # now used in all algorithms, this should reflect the lstm step size, otherwqise it restarts two times
        self.rand_target_thresh = 40
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'
        self.reset_jnts = True
        self._collision_msg = None

        self._time_lock = threading.RLock()

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach

        EE_POS_TGT = np.asmatrix([-0.40028, 0.095615, 0.72466 + 0.1])  # alex2
        # EE_POS_TGT = np.asmatrix([-0.386752, -0.000756, 1.40557]) # easy point
        # EE_POS_TGT = np.asmatrix([-0.173762, -0.0124312, 1.60415]) # for testing collision_callback
        # EE_POS_TGT = np.asmatrix([-0.580238, -0.179591, 0.72466]) # rubik touching the bar
        # EE_ROT_TGT = np.asmatrix([[-0.00128296,  0.9999805 ,  0.00611158],
        #                            [ 0.00231397, -0.0061086 ,  0.99997867],
        #                            [ 0.9999965 ,  0.00129708, -0.00230609]])
        # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis
        # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981])
        # EE_POS_TGT = np.asmatrix([-0.4023037912211465, 0.15501116706606247, 0.7238499613771884]) # 200 cm from the z axis

        # # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H
        # EE_ROT_TGT = np.asmatrix([[-0.99521107,  0.09689605, -0.01288708],
        #                           [-0.09768035, -0.99077857,  0.09389558],
        #                           [-0.00367013,  0.09470474,  0.99549864]])

        # EE_ROT_TGT = np.asmatrix([[-0.99521107,  0.09689605, -0.01288708],
        #                           [-0.09768035, -0.99077857,  0.09389558],
        #                           [-0.00367013,  0.09470474,  0.99549864]])
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        # EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238,  0.31536287], [0.51531424,  0.85207952,  0.09171542], [-0.31601302,  0.08944959,  0.94452874]]) # original orientation
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        # INITIAL_JOINTS = np.array([0., 0., 1., 0., 1.57, 0.])
        INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        # STEP_COUNT = 2  # Typically 100.
        # slowness = 10000000 # 10 ms, where 1 second is real life simulation
        # slowness = 1000000 # 1 ms, where 1 second is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation
        # slowness = 10 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/mara_controller/command'
        JOINT_SUBSCRIBER = '/mara_controller/state'
        LINK_STATE_PUBLISHER = '/gazebo/set_link_state'

        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        MOTOR4_JOINT = 'motor4'
        MOTOR5_JOINT = 'motor5'
        MOTOR6_JOINT = 'motor6'

        # Set constants for links
        TABLE = 'table'

        BASE = 'base_link'

        MARA_MOTOR1_LINK = 'motor1_link'
        MARA_MOTOR2_LINK = 'motor2_link'
        MARA_MOTOR3_LINK = 'motor3_link'
        MARA_MOTOR4_LINK = 'motor4_link'
        MARA_MOTOR5_LINK = 'motor5_link'
        MARA_MOTOR6_LINK = 'motor6_link'
        EE_LINK = 'ee_link'

        # EE_LINK = 'ee_link'
        JOINT_ORDER = [
            MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT,
            MOTOR5_JOINT, MOTOR6_JOINT
        ]
        LINK_NAMES = [
            TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK,
            MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = rospkg.RosPack().get_path(
            "mara_description") + "/urdf/mara_demo_camera_top.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)
        self.realgoal = ee_tgt
        self.target_orientation = ee_rot_tgt

        self.environment = {
            # rk changed this to for the mlsh
            # 'ee_points_tgt': ee_tgt,
            'ee_points_tgt': self.realgoal,
            'ee_point_tgt_orient': self.target_orientation,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}
        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher(JOINT_PUBLISHER,
                                    JointTrajectory,
                                    queue_size=1)
        self._sub = rospy.Subscriber(JOINT_SUBSCRIBER,
                                     JointTrajectoryControllerState,
                                     self.observation_callback)
        self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState,
                                          self.collision_callback)
        self._pub_link_state = rospy.Publisher(LINK_STATE_PUBLISHER,
                                               LinkState,
                                               queue_size=1)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling step()
        # observation = self.take_observation()
        # assert not done
        # self.obs_dim = observation.size
        """
        obs_dim is defined as:
        num_dof + end_effector_points=3 + end_effector_velocities=3
        end_effector_points and end_effector_velocities is constant and equals 3
        recently also added quaternion to the obs, which has dimension=4
        """
        #
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 9  #7 #6 hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi * np.ones(self.scara_chain.getNrOfJoints())
        high = -low
        self.action_space = spaces.Box(low, high, dtype=np.float32)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        self.add_model_urdf = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                                 SpawnModel)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)

        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0, 0]
        pose.position.y = EE_POS_TGT[0, 1]
        pose.position.z = EE_POS_TGT[0, 2]
        pose.orientation.x = 0
        pose.orientation.y = 0
        pose.orientation.z = 0
        pose.orientation.w = 0
        reference_frame = "world"

        self.assets_path = os.path.abspath(
            os.path.join(rospkg.RosPack().get_path("gazebo_domain_randomizer"),
                         os.pardir)) + "/assets"

        file_xml = open(self.assets_path + '/models/urdf/target_point.urdf',
                        mode='r')
        model_xml = file_xml.read()
        file_xml.close()

        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        try:
            self.add_model_urdf(model_name="target",
                                model_xml=model_xml,
                                robot_namespace=robot_namespace,
                                initial_pose=pose,
                                reference_frame=reference_frame)
        except rospy.ServiceException as e:
            print('Error adding urdf model')

        self._seed()

    def collision_callback(self, message):
        """
        Callback method for the subscriber of Collision data
        """

        if message.collision1_name != message.collision2_name:
            # neither obj nor obstacle colliding with table
            if "obj::" not in message.collision1_name and "obstacle" not in message.collision1_name or "table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name:
                # neither obj colliding with obstacle and vice-versa nor objs colliding each other nor obstacles colliding each other
                if "obj::" not in (
                        message.collision1_name and message.collision2_name
                ) and "obstacle" not in (message.collision1_name
                                         and message.collision2_name):
                    self._collision_msg = message

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message

    def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True):
        self.slowness = slowness
        self.slowness_unit = slowness_unit
        self.reset_jnts = reset_jnts
        print("slowness: ", self.slowness)
        print("slowness_unit: ", self.slowness_unit, "type of variable: ",
              type(slowness_unit))
        print("reset joints: ", self.reset_jnts, "type of variable: ",
              type(self.reset_jnts))

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        obs_message = self._observation_msg
        if obs_message is None:
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:self.scara_chain.getNrOfJoints()],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])

            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # current_quaternion = np.array([angle]+dir.tolist())#

            # # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here
            # current_quaternion = quaternion_from_matrix(rotation_matrix)
            # tgt_quartenion = quaternion_from_matrix(self.target_orientation)
            current_quaternion = tf.quaternions.mat2quat(rot)  #[w, x, y ,z]
            tgt_quartenion = tf.quaternions.mat2quat(self.target_orientation)

            # A  = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T
            #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0]

            quat_error = current_quaternion * tgt_quartenion.conjugate()
            rot_vec_err, _ = tf.quaternions.quat2axangle(quat_error)

            # convert quat to np arrays
            # quat_error = np.asarray(quat_error, dtype=np.quaternion).view((np.double, 4))

            # RK:  revisit this later, we only take one angle difference here!
            # angle_diff = 2 * np.arccos(np.clip(quat_error[..., 0], -1., 1.))

            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.realgoal  #self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(rot_vec_err, -1),
                          np.reshape(ee_velocities, -1), ]
            return state

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - action
            - observation
            - reward
            - done (status)
        """
        self.iterator += 1

        self._pub.publish(
            self.get_trajectory_message(
                action[:self.scara_chain.getNrOfJoints()]))

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        self.reward_dist = -self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints():
                    (self.scara_chain.getNrOfJoints() + 3)])

        if self._collision_msg is not None:
            if self._collision_msg.collision1_name is None:
                raise AttributeError("collision1_name is None")
            if self._collision_msg.collision2_name is None:
                raise AttributeError("collision2_name is None")

            self.reward = self.reward_dist * 2.0

            # Resets the state of the environment and returns an initial observation.
            # we should avoid this --> huge bottleneck
            rospy.wait_for_service('/gazebo/reset_simulation')
            try:
                self.reset_proxy()

                pose = Pose()
                pose.position.x = self.realgoal[0]
                pose.position.y = self.realgoal[1]
                pose.position.z = self.realgoal[2]
                pose.orientation.x = 0
                pose.orientation.y = 0
                pose.orientation.z = 0
                pose.orientation.w = 0
                self._pub_link_state.publish(
                    LinkState(link_name="target_link",
                              pose=pose,
                              reference_frame="world"))

            except (rospy.ServiceException) as e:
                print("/gazebo/reset_simulation service call failed")

        else:
            # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
            # here is the distance block
            if abs(self.reward_dist) < 0.005:
                self.reward = 1 + self.reward_dist  # Make the reward increase as the distance decreases
                print("Reward dist is: ", self.reward)
            else:
                self.reward = self.reward_dist

        done = bool((abs(self.reward_dist) < 0.005)
                    or (self.iterator > self.max_episode_steps))

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        self._collision_msg = None
        return self.ob, self.reward, done, {}

    def reset(self):
        """
        Reset the agent for a particular experiment condition.
        """
        self.iterator = 0

        if self.reset_jnts is True:
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))

            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                time.sleep(int(self.slowness))
            elif (self.slowness_unit == 'nsec'):
                time.sleep(int(self.slowness /
                               1000000000))  # using nanoseconds
            else:
                print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
Esempio n. 6
0
class GazeboMARATopOrientVisionv0Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "MARATop3DOF_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000  # now used in all algorithms
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'
        self.reset_jnts = True
        self.detect_target_once = 1

        self._time_lock = threading.RLock()

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis
        # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981])
        EE_POS_TGT = np.asmatrix([
            -0.53170885, -0.02076771, 0.74240961
        ])  # 200 cm from the z axis some random target at the begining
        # EE_POS_TGT = np.asmatrix([pose_rubik_pred.position.x, pose_rubik_pred.position.y, pose_rubik_pred.position.z])
        # print("EE_POS_TGT: ", EE_POS_TGT)

        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H
        # EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238,  0.31536287], [0.51531424,  0.85207952,  0.09171542], [-0.31601302,  0.08944959,  0.94452874]])
        # EE_POS_TGT = np.asmatrix([0.0, 0.0, 1.4])
        # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])

        EE_ROT_TGT = np.asmatrix([[-0.99500678, 0.09835458, -0.01696725],
                                  [-0.09951792, -0.99061751, 0.09366505],
                                  [-0.00759566, 0.0948859, 0.99545918]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        # INITIAL_JOINTS = np.array([0., 0., 1.57, 0., 1.57, 0.])
        # INITIAL_JOINTS = np.array([0., 0., 1.37, 0., 1.37, 0.])
        INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.])
        # INITIAL_JOINTS = np.array([2.832116288509212e-05, -7.644633833070458e-05, -0.9999138952294953, -2.4499067147409903e-05, -1.5700625461089226, 1.4725492722966749e-05])
        # Used to initialize the robot, #TODO, clarify this more
        # STEP_COUNT = 2  # Typically 100.
        # slowness = 10000000 # 10 ms, where 1 second is real life simulation
        # slowness = 1000000 # 1 ms, where 1 second is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation
        # slowness = 10 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/mara_controller/command'
        JOINT_SUBSCRIBER = '/mara_controller/state'

        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        MOTOR4_JOINT = 'motor4'
        MOTOR5_JOINT = 'motor5'
        MOTOR6_JOINT = 'motor6'

        # Set constants for links
        TABLE = 'table'

        BASE = 'base_link'

        MARA_MOTOR1_LINK = 'motor1_link'
        MARA_MOTOR2_LINK = 'motor2_link'
        MARA_MOTOR3_LINK = 'motor3_link'
        MARA_MOTOR4_LINK = 'motor4_link'
        MARA_MOTOR5_LINK = 'motor5_link'
        MARA_MOTOR6_LINK = 'motor6_link'
        EE_LINK = 'ee_link'

        # EE_LINK = 'ee_link'
        JOINT_ORDER = [
            MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT,
            MOTOR5_JOINT, MOTOR6_JOINT
        ]
        LINK_NAMES = [
            TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK,
            MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = rospkg.RosPack().get_path(
            "mara_description") + "/urdf/mara_demo_camera_top.urdf"
        # URDF_PATH = "/home/rkojcev/catkin_ws/src/mara/mara_description/urdf/mara_demo_camera_top.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)
        print("ee_tgt: ", ee_tgt)
        self.realgoal = ee_tgt
        self.target_orientation = ee_rot_tgt

        self.environment = {
            # rk changed this to for the mlsh
            # 'ee_points_tgt': ee_tgt,
            'ee_points_tgt': self.realgoal,
            'ee_point_tgt_orient': self.target_orientation,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory)
        self._sub = rospy.Subscriber(JOINT_SUBSCRIBER,
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        TARGET_SUBSCRIBER = '/mara/target'
        self._sub_tgt = rospy.Subscriber(TARGET_SUBSCRIBER, Pose,
                                         self.tgt_callback)
        # Instantiate CvBridge
        # self.bridge = CvBridge()
        # self._sub_image = rospy.Subscriber("/mara/rgb/image_raw", ImageMsg, self._observation_image_callback)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # observation = self.take_observation()
        # assert not done
        # self.obs_dim = observation.size
        """
        obs_dim is defined as:
        num_dof + end_effector_points=3 + end_effector_velocities=3
        end_effector_points and end_effector_velocities is constant and equals 3
        recently also added quaternion to the obs, which has dimension=4
        """
        #
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 10  #6 hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # low = -np.pi * np.ones(self.scara_chain.getNrOfJoints())
        # high = np.pi * np.ones(self.scara_chain.getNrOfJoints())
        # low = -np.inf * np.ones(self.scara_chain.getNrOfJoints())
        # high = np.inf * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                            SpawnModel)
        self.add_model_sdf = rospy.ServiceProxy('/gazebo/spawn_sdf_model',
                                                SpawnModel)
        self.remove_model = rospy.ServiceProxy('/gazebo/delete_model',
                                               DeleteModel)
        self.pub_set_model = rospy.Publisher('/gazebo/set_model_state',
                                             ModelState,
                                             queue_size=1)

        self.addTarget()

        # Seed the environment
        # Seed the environment
        self._seed()

    def tgt_callback(self, msg):
        # print("Whats the target?: ", msg)
        # self.realgoal is None and self.target_orientation is None:

        if self.detect_target_once is 1:
            print("Get the target from vision, for now just use position.")
            EE_POS_TGT = np.asmatrix(
                [msg.position.x, msg.position.y, msg.position.z])
            rot_quat = np.quaternion(msg.orientation.x, msg.orientation.y,
                                     msg.orientation.z, msg.orientation.w)
            print("rot_quat: ", rot_quat)
            rot_matrix = quat.as_rotation_matrix(rot_quat)
            print("rot_matrix: ", rot_matrix)
            EE_ROT_TGT = rot_matrix  #np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
            # rot_matrix
            # EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238,  0.31536287], [0.51531424,  0.85207952,  0.09171542], [-0.31601302,  0.08944959,  0.94452874]]) #rot_matrix#
            EE_POINTS = np.asmatrix([[0, 0, 0]])
            ee_pos_tgt = EE_POS_TGT

            # leave rotation target same since in scara we do not have rotation of the end-effector
            ee_rot_tgt = EE_ROT_TGT
            target1 = np.ndarray.flatten(
                get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

            # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)

            self.realgoal = target1
            self.target_orientation = ee_rot_tgt
            print("Predicted target is: ", self.realgoal)
            self.detect_target_once = 0

            self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                                SpawnModel)
            self.remove_model = rospy.ServiceProxy('/gazebo/delete_model',
                                                   DeleteModel)

            model_xml = "<?xml version=\"1.0\"?> \
                        <robot name=\"myfirst\"> \
                          <link name=\"world\"> \
                          </link>\
                          <link name=\"sphere0\">\
                            <visual>\
                              <geometry>\
                                <sphere radius=\"0.01\"/>\
                              </geometry>\
                              <origin xyz=\"0 0 0\"/>\
                              <material name=\"rojotransparente\">\
                                  <ambient>0.5 0.5 1.0 0.1</ambient>\
                                  <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                              </material>\
                            </visual>\
                            <inertial>\
                              <mass value=\"5.0\"/>\
                              <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                            </inertial>\
                          </link>\
                          <joint name=\"world_to_base\" type=\"fixed\"> \
                            <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                            <parent link=\"world\"/>\
                            <child link=\"sphere0\"/>\
                          </joint>\
                          <gazebo reference=\"sphere0\">\
                            <material>Gazebo/GreenTransparent</material>\
                          </gazebo>\
                        </robot>"

            robot_namespace = ""
            pose = Pose()
            pose.position.x = EE_POS_TGT[0, 0]
            pose.position.y = EE_POS_TGT[0, 1]
            pose.position.z = EE_POS_TGT[0, 2]

            #Static obstacle (not in original code)
            # pose.position.x = 0.25;#
            # pose.position.y = 0.07;#
            # pose.position.z = 0.0;#

            pose.orientation.x = 0
            pose.orientation.y = 0
            pose.orientation.z = 0
            pose.orientation.w = 0
            reference_frame = ""
            rospy.wait_for_service('/gazebo/spawn_urdf_model')
            self.add_model(model_name="target",
                           model_xml=model_xml,
                           robot_namespace="",
                           initial_pose=pose,
                           reference_frame="")

    def addTarget(self):
        # The idea is to add random target in our case rubik cube and the vision system to detect and find the 3D pose of the cube.
        # Open a file: file
        # os.chdir('../assets/urdf/rubik_cube')
        # print("os: ", os)
        file = open(
            '/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/rubik_cube/rubik_cube.sdf',
            mode='r')
        # read all lines at once
        model_xml = file.read()
        # close the file
        file.close()

        rospy.wait_for_service('/gazebo/spawn_urdf_model')

        pose = Pose()

        pose.position.x = -0.5074649153217804  #-0.5074649153217804#random.uniform(-0.3, -0.6);
        pose.position.y = random.uniform(
            -0.02, 0.01)  #0.03617460539210797#random.uniform(-0.02, 0.01)
        # stay put in Z!!!
        pose.position.z = 0.72  #0.72#0.80 #0.72;

        roll = 0.0  #random.uniform(-0.2, 0.6)
        pitch = 0.0  #random.uniform(-0.2, 0.2)
        yaw = 0.0  #-0.3#random.uniform(-0.3, 0.3)
        new_camera_pose = False
        q_rubik = quat.from_euler_angles(roll, pitch, yaw)
        # print("q_rubik: ", q_rubik.x, q_rubik.y, q_rubik.z, q_rubik.w)

        pose.orientation.x = q_rubik.x  #0.0#q_rubik[0]
        pose.orientation.y = q_rubik.y  #0.0#q_rubik[1]
        pose.orientation.z = q_rubik.z  #0.0#q_rubik[2]
        pose.orientation.w = q_rubik.w  #0.0#q_rubik[3]

        print("Real pose is: ", pose)
        try:

            self.add_model_sdf(model_name="puzzle_ball_joints",
                               model_xml=model_xml,
                               robot_namespace="",
                               initial_pose=pose,
                               reference_frame="")
            print("service call ok")
        except:
            print('error adding model')

        self.pub_set_model.publish(
            ModelState(model_name='puzzle_ball_joints',
                       pose=pose,
                       reference_frame="world"))
        # try:
        #     self.removeTargetSphere()
        # except:
        #     print('error removing sphere')

    def removeTarget(self):
        rospy.wait_for_service('/gazebo/delete_model')
        try:
            self.remove_model(model_name="puzzle_ball_joints")
        except (rospy.ServiceException) as e:
            print("/gazebo/spawn_urdf_model service call failed")

    def removeTargetSphere(self):
        rospy.wait_for_service('/gazebo/delete_model')
        try:
            self.remove_model(model_name="target")
        except (rospy.ServiceException) as e:
            print("/gazebo/spawn_urdf_model service call failed")

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message

    def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True):
        self.slowness = slowness
        self.slowness_unit = slowness_unit
        self.reset_jnts = reset_jnts
        print("slowness: ", self.slowness)
        print("slowness_unit: ", self.slowness_unit, "type of variable: ",
              type(slowness_unit))
        print("reset joints: ", self.reset_jnts, "type of variable: ",
              type(self.reset_jnts))

    def setTargetPositions(self, msg):
        """
        The goal is to test with randomized positions which range between the boundries of the H-ROS logo
        """
        print("In randomize target positions.")
        EE_POS_TGT_RANDOM1 = np.asmatrix([
            np.random.uniform(0.2852485, 0.3883636),
            np.random.uniform(-0.1746508, 0.1701576), 0.2868
        ])  # boundry box of the first half H-ROS letters with +-0.01 offset
        # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset
        # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287],
                                  [0.51531424, 0.85207952, 0.09171542],
                                  [-0.31601302, 0.08944959, 0.94452874]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT
        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)

        # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)

        self.realgoal = target1
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeTargetPositions(self):
        """
        The goal is to test with randomized positions which range between the boundries of the H-ROS logo
        """
        print("In randomize target positions.")
        EE_POS_TGT_RANDOM1 = np.asmatrix([
            np.random.uniform(0.2852485, 0.3883636),
            np.random.uniform(-0.1746508, 0.1701576), 0.2868
        ])  # boundry box of the first half H-ROS letters with +-0.01 offset
        EE_POS_TGT_RANDOM2 = np.asmatrix([
            np.random.uniform(0.2852485, 0.3883636),
            np.random.uniform(-0.1746508, 0.1701576), 0.2868
        ])  # boundry box of the H-ROS letters with +-0.01 offset
        # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1
        ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT
        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T)

        # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)

        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeTarget(self):
        print("calling randomize target")

        EE_POS_TGT_1 = np.asmatrix([-0.189383, -0.123176, 0.894476])  # point 1
        EE_POS_TGT_2 = np.asmatrix([-0.359236, 0.0297278, 0.760402])  # point 2
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeMultipleTargets(self):
        print("calling randomize multiple target")

        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366,
                                    0.2868])  # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121,
                                    0.2868])  # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:self.scara_chain.getNrOfJoints()],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            tgt_quartenion = quaternion_from_matrix(self.target_orientation)

            A = np.vstack(
                [current_quaternion,
                 np.ones(len(current_quaternion))]).T

            #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0]

            quat_error = current_quaternion - tgt_quartenion
            # print("quat_error: ",quat_error)
            # print("self.realgoal: ", self.realgoal)
            # print("curr quat: ", current_quaternion)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.realgoal  #self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(quat_error, -1),
                          np.reshape(ee_velocities, -1), ]
            #print("quat_error: ", quat_error)
            #print("ee_points:", ee_points)
            return np.r_[np.reshape(last_observations, -1),
                         np.reshape(ee_points, -1),
                         np.reshape(quat_error, -1),
                         np.reshape(ee_velocities, -1), ]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator += 1
        # rmse_trans = self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])
        # rmse_orient = self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])
        # # print("rmse_orient: ", self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])
        # self.reward_dist = -rmse_trans
        # self.reward_orient = -rmse_orient
        #                     # + self.ob[(self.scara_chain.getNrOfJoints()+4)] )
        #
        # # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        # if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005):
        #     self.reward_final_dist = 1 + self.reward_dist # Make the reward increase as the distance decreases
        #     print("Reward Pose is: ", self.reward_final_dist)
        # else:
        #     self.reward_final_dist = self.reward_dist

        self.reward_dist = -self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints():
                    (self.scara_chain.getNrOfJoints() + 3)])
        self.reward_orient = -self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints() +
                    3:(self.scara_chain.getNrOfJoints() + 7)])
        # print("self.reward_orient: ", self.reward_orient)

        #scale here the orientation because it should not be the main bias of the reward, position should be
        orientation_scale = 0.2

        # # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(
                self.scara_chain.getNrOfJoints() + 3)]) < 0.005):
            self.reward = 1 - self.rmse_func(
                self.ob[self.scara_chain.getNrOfJoints():(
                    self.scara_chain.getNrOfJoints() +
                    3)])  # Make the reward increase as the distance decreases
            print("Reward position is: ", self.reward)
        else:
            self.reward = self.reward_dist
        #
        if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints() + 3:(
                self.scara_chain.getNrOfJoints() + 7)]) < 0.1):
            self.reward = self.reward + orientation_scale * (
                1 - self.rmse_func(
                    self.ob[self.scara_chain.getNrOfJoints() + 3:
                            (self.scara_chain.getNrOfJoints() + 7)]))
            print("Reward orientation is: ", self.reward)
        else:
            self.reward = self.reward + orientation_scale * self.rmse_func(
                self.ob[self.scara_chain.getNrOfJoints() + 3:
                        (self.scara_chain.getNrOfJoints() + 7)])

        #this is very hard to converge
        # self.reward = self.reward_dist + orientation_scale*self.reward_orient
        #
        # if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])<0.1 and self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005):
        #     self.reward = 10.*((1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])) + orientation_scale * (1 -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])))
        #     print("Reward hit the target, and is: ", self.reward)
        # # # self.reward = self.reward_final_dist + orientation_scale*self.final_rew_orient

        # self.reward =self.reward - abs(self.ob[(self.scara_chain.getNrOfJoints()+4)])
        # Calculate if the env has been solved
        done = bool(((abs(self.reward_dist) < 0.005) and
                     (abs(self.reward_orient)) < 0.1)
                    or (self.iterator > self.max_episode_steps))

        # Execute "action"
        self._pub.publish(
            self.get_trajectory_message(
                action[:self.scara_chain.getNrOfJoints()]))

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # # this is workout for the collision enviroment, does not work properly till now.
        # if done:
        #     self.goToInit()
        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def goToInit(self):
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()
        # # Go to initial position and wait until it arrives there
        # Wait until the arm is within epsilon of reset configuration.
        self._time_lock.acquire(True, -1)
        with self._time_lock:
            self._currently_resetting = True
        self._time_lock.release()

        if self._currently_resetting:
            epsilon = 1e-3
            reset_action = self.environment['reset_conditions'][
                'initial_positions']
            now_action = self._observation_msg.actual.positions
            du = np.linalg.norm(reset_action - now_action, float(np.inf))
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))
            if du > epsilon:
                self._currently_resetting = True
                self._pub.publish(
                    self.get_trajectory_message(
                        self.environment['reset_conditions']
                        ['initial_positions']))
                time.sleep(3)

    def _reset(self):
        """
        Reset the agent for a particular experiment condition.
        """

        self.iterator = 0

        if self.reset_jnts is True:
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))
            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                time.sleep(int(self.slowness))
            elif (self.slowness_unit == 'nsec'):
                time.sleep(int(self.slowness /
                               1000000000))  # using nanoseconds
            else:
                print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
Esempio n. 7
0
class GazeboModularScara3DOFv2Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and ROS 2 interfaces,
                    for now communicating throught a bridge.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746])  # center of O
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0.0, 0.0, 0.0])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.
        # # Set the number of seconds per step of a sample.
        # TIMESTEP = 0.01  # Typically 0.01.
        # # Set the number of timesteps per sample.
        # STEP_COUNT = 100  # Typically 100.
        # # Set the number of samples per condition.
        # SAMPLE_COUNT = 5  # Typically 5.
        # # Set the number of trajectory iterations to collect.
        # ITERATIONS = 20  # Typically 10.
        # How much time does it take to execute the trajectory (in seconds)
        slowness = 1000000  # 1 is real life simulation
        # slowness = 1

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'scara_e1_rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [
            BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1,
            SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
            SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2,
            SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3,
            SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        #URDF_PATH = "../assets/urdf/modular_scara/scara_e1_3joints.urdf"

        import rospkg

        r = rospkg.RosPack()
        path = r.get_path('scara_e1_description')
        URDF_PATH = path + "/urdf/scara_e1_3joints.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.environment = {
            # 'dt': TIMESTEP,
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
            # 'num_samples': SAMPLE_COUNT,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command',
                                    JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state',
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions
        # rclpy.init(args=None)
        # self.ros2_node = rclpy.create_node('robot_ai_node')
        # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER)
        # # self._callbacks = partial(self.observation_callback, robot_id=0)
        # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data)
        # # self._time_lock = threading.RLock()

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self.step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = 9  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)

        # Seed the environment
        self._seed()

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message
        # print("!!!!!!!!!Received an observation")

    # def discretize_observation(self,data,new_ranges):
    #     discretized_ranges = []
    #     min_range = 0.2
    #     done = False
    #     mod = len(data.ranges)/new_ranges
    #     for i, item in enumerate(data.ranges):
    #         if (i%mod==0):
    #             if data.ranges[i] == float ('Inf') or np.isinf(data.ranges[i]):
    #                 discretized_ranges.append(6)
    #             elif np.isnan(data.ranges[i]):
    #                 discretized_ranges.append(0)
    #             else:
    #                 discretized_ranges.append(int(data.ranges[i]))
    #         if (min_range > data.ranges[i] > 0):
    #             done = True
    #     return discretized_ranges,done

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()

        action_float = [float(i) for i in action]

        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        # target.time_from_start.secs = self.environment['slowness']
        target.time_from_start.nsecs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(3)
        # Initialize a joint array for the present 6 joint angles.
        angles = JntArray(3)
        # Construct the joint array from the most recent joint angles.
        for i in range(3):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, 3)
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    # def take_observation_ros2(self):
    #     """
    #     Take observation from the environment and return it.
    #     TODO: define return type
    #     """
    #     # Take an observation
    #     if rclpy.ok():
    #         # Only read and process ROS messages if they are fresh.
    #         # TODO: review, robot_id seems specific to Risto's implementation
    #
    #         # # Acquire the lock to prevent the subscriber thread from
    #         # # updating times or observation messages.
    #         # self._time_lock.acquire(True)
    #         obs_message = self._observation_msg
    #
    #         # Collect the end effector points and velocities in
    #         # cartesian coordinates for the state.
    #         # Collect the present joint angles and velocities from ROS for the state.
    #         last_observations = self.process_observations(obs_message, self.environment)
    #         if last_observations is None:
    #             print("last_observations is empty")
    #         else:
    #         # # # Get Jacobians from present joint angles and KDL trees
    #         # # # The Jacobians consist of a 6x6 matrix getting its from from
    #         # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
    #             ee_link_jacobians = self.get_jacobians(last_observations)
    #             if self.environment['link_names'][-1] is None:
    #                 print("End link is empty!!")
    #             else:
    #                 # print(self.environment['link_names'][-1])
    #                 trans, rot = forward_kinematics(self.scara_chain,
    #                                             self.environment['link_names'],
    #                                             last_observations[:3],
    #                                             base_link=self.environment['link_names'][0],
    #                                             end_link=self.environment['link_names'][-1])
    #                 # #
    #                 rotation_matrix = np.eye(4)
    #                 rotation_matrix[:3, :3] = rot
    #                 rotation_matrix[:3, 3] = trans
    #                 # angle, dir, _ = rotation_from_matrix(rotation_matrix)
    #                 # #
    #                 # current_quaternion = np.array([angle]+dir.tolist())#
    #
    #                 # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
    #                 current_quaternion = quaternion_from_matrix(rotation_matrix)
    #
    #                 current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'],
    #                                                                   trans,
    #                                                                   rot).T)
    #                 ee_points = current_ee_tgt - self.environment['ee_points_tgt']
    #
    #                 ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians,
    #                                                                        self.environment['end_effector_points'],
    #                                                                        rot)
    #                 ee_velocities = self.get_ee_points_velocities(ee_link_jacobians,
    #                                                                self.environment['end_effector_points'],
    #                                                                rot,
    #                                                                last_observations)
    #
    #                 #
    #                 # Concatenate the information that defines the robot state
    #                 # vector, typically denoted asrobot_id 'x'.
    #                 state = np.r_[np.reshape(last_observations, -1),
    #                               np.reshape(ee_points, -1),
    #                               np.reshape(ee_velocities, -1),]
    #
    #                 return np.r_[np.reshape(last_observations, -1),
    #                               np.reshape(ee_points, -1),
    #                               np.reshape(ee_velocities, -1),]
    #     else:
    #         print("Observation is None")
    #         return None

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        # # ROS 2, Acquire the lock to prevent the subscriber thread from
        # # updating times or observation messages.
        # self._time_lock.acquire(True)
        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        #print("last_observations", last_observations)######

        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:3],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])

            #print("trans", trans)######

            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1), ]

            return np.r_[np.reshape(last_observations, -1),
                         np.reshape(ee_points, -1),
                         np.reshape(ee_velocities, -1), ]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        # print("RMSE", rmse) #####
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - observation
            - reward
            - done (status)
            - dictionary (#TODO clarify)
        """
        # Unpause simulation
        rospy.wait_for_service('/gazebo/unpause_physics')
        try:
            self.unpause()
        except (rospy.ServiceException) as e:
            print("/gazebo/unpause_physics service call failed")

        # Execute "action"
        # if rclpy.ok(): # ROS 2 code

        #print("self.get_trajectory_message(action[:3])", self.get_trajectory_message(action[:3]))#########

        self._pub.publish(self.get_trajectory_message(action[:3]))
        #TODO: wait until action gets executed
        # time.sleep(int(self.environment['slowness'])) # using seconds
        time.sleep(int(self.environment['slowness']) /
                   1000000000)  # using nanoseconds

        #print("Before action observation", self.ob[:3])#######

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        #print("action", action[:3])#######
        #print("After action observation", self.ob[:3])#######

        # Pause simulation
        rospy.wait_for_service('/gazebo/pause_physics')
        try:
            #resp_pause = pause.call()
            self.pause()
        except (rospy.ServiceException) as e:
            print("/gazebo/pause_physics service call failed")

        # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        # self.ob = take_observation()

        # # Old implementation for reward calculation
        # # Calculate reward based on observation
        # if np.linalg.norm(self.ob[1]) < 0.005:
        #     self.reward_dist = 1000.0 * np.linalg.norm(self.ob[1])#- 10.0 * np.linalg.norm(ee_points)
        #     # self.reward_ctrl = np.linalg.norm(action)#np.square(action).sum()
        #     done = True
        #     print("self.reward_dist: ", self.reward_dist, "self.reward_ctrl: ", self.reward_ctrl)
        # else:
        #     self.reward_dist = - np.linalg.norm(self.ob[1])
        #     # self.reward_ctrl = - np.linalg.norm(action)# np.square(action).sum()
        # # self.reward = 2.0 * self.reward_dist + 0.01 * self.reward_ctrl
        # #removed the control reward, maybe we should add it later.
        # self.reward = self.reward_dist

        # Calculate the reward
        #  Heuristic used for the calculation of the reward:
        #   - calculate the residual mean square error (rmse) between the current
        #       end effector point and the target point
        #   - the reward is the result of the difference between 2 and the rmse absolute value
        #       this means that the reward increase as the distance decreases.
        # print("ob :", self.ob)
        # print("obs[3:6] :", self.ob[3:6])
        self.reward_dist = self.rmse_func(self.ob[3:6])
        # print("reward_dist :", self.reward_dist)
        self.reward = 1 - self.reward_dist  # Make the reward increase as the distance decreases
        # print("reward: ", self.reward)

        # Calculate if the env has been solved
        done = bool(abs(self.reward_dist) < 0.005)

        # self._time_lock.release() # ROS 2 code

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def reset(self):
        """
        Reset the agent for a particular experiment condition.
        """
        # Resets the state of the environment and returns an initial observation.
        rospy.wait_for_service('/gazebo/reset_simulation')
        try:
            #reset_proxy.call()
            self.reset_proxy()
        except (rospy.ServiceException) as e:
            print("/gazebo/reset_simulation service call failed")

        # Unpause simulation
        rospy.wait_for_service('/gazebo/unpause_physics')
        try:
            self.unpause()
        except (rospy.ServiceException) as e:
            print("/gazebo/unpause_physics service call failed")

        # Go to initial position and wait until it arrives there
        self._pub.publish(
            self.get_trajectory_message(
                self.environment['reset_conditions']['initial_positions']))
        ## time.sleep(int(self.environment['slowness'])) # using seconds
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds
        time.sleep(1)

        # # Use moveit to return to the original position
        # import moveit_commander
        # import moveit_msgs.msg
        # import geometry_msgs.msg
        # # print "============ Starting tutorial setup"
        # moveit_commander.roscpp_initialize(sys.argv)
        # # rospy.init_node('move_group_python_interface_tutorial',
        # #                 anonymous=True)
        # robot = moveit_commander.RobotCommander()
        # scene = moveit_commander.PlanningSceneInterface()
        # group = moveit_commander.MoveGroupCommander("scara_arm")
        # group.clear_pose_targets()
        # roup_variable_values = group.get_current_joint_values()
        # # print "============ Joint values: ", group_variable_values
        # group_variable_values[0] = 0.0
        # group_variable_values[1] = 0.0
        # group_variable_values[2] = 0.0
        # group.set_joint_value_target(group_variable_values)
        # plan2 = group.plan()

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Pause the simulation
        rospy.wait_for_service('/gazebo/pause_physics')
        try:
            self.pause()
        except (rospy.ServiceException) as e:
            print("/gazebo/pause_physics service call failed")

        # Return the corresponding observation
        return self.ob
Esempio n. 8
0
class GazeboModularScara3DOFv4Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self,
                 reward_type='sparse',
                 distance_threshold=0.005,
                 n_substeps=20,
                 relative_control=False):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000  # limit the max episode step
        self._max_episode_steps = self.max_episode_steps
        # class variable that iterates to accounts for number of steps per episode
        self.iterator = 0
        # default to 1ms, since ddpg is slow
        self.slowness = 1000000
        self.slowness_unit = 'nsec'
        self.reset_jnts = True
        self.choose_robot = 0
        self.reward_type = reward_type
        self.distance_threshold = distance_threshold
        self.n_substeps = n_substeps
        self.relative_control = relative_control

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746])  # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        # EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.3746]) #center of S
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'scara_e1_rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [
            BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1,
            SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
            SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2,
            SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3,
            SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_3joints.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.realgoal = ee_tgt

        self.environment = {
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
            # 'num_samples': SAMPLE_COUNT,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command',
                                    JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state',
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 6  # hardcode it for now

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high, dtype=np.float32)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        # Seed the environment
        self._seed()

        # self.max_steps_episode = 1000
    def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True):
        self.slowness = slowness
        self.slowness_unit = slowness_unit
        self.reset_jnts = reset_jnts
        print("slowness: ", self.slowness)
        print("slowness_unit: ", self.slowness_unit, "type of variable: ",
              type(slowness_unit))
        print("reset joints: ", self.reset_jnts, "type of variable: ",
              type(self.reset_jnts))

    def randomizeTargetPositions(self):
        """
        The goal is to test with randomized positions which range between the boundries of the H-ROS logo
        """
        print("In randomize target positions.")
        EE_POS_TGT_RANDOM1 = np.asmatrix([
            np.random.uniform(0.2852485, 0.3883636),
            np.random.uniform(-0.1746508, 0.1701576), 0.3746
        ])  # boundry box of the first half H-ROS letters with +-0.01 offset
        EE_POS_TGT_RANDOM2 = np.asmatrix([
            np.random.uniform(0.2852485, 0.3883636),
            np.random.uniform(-0.1746508, 0.1701576), 0.3746
        ])  # boundry box of the H-ROS letters with +-0.01 offset
        # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1
        ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT
        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T)

        # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)

        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeTarget(self):
        print("calling randomize target")

        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366,
                                    0.3746])  # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121,
                                    0.3746])  # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeMultipleTargets(self):
        print("calling randomize multiple targets")

        EE_POS_TGT_1 = np.asmatrix([0.3305805, -0.1326121,
                                    0.3746])  # center of the H
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.0985179,
                                    0.3746])  # center of H right
        EE_POS_TGT_3 = np.asmatrix([0.3325683, 0.0657366,
                                    0.3746])  # center of O
        EE_POS_TGT_4 = np.asmatrix([0.3355224, 0.0344309,
                                    0.3746])  # center of O left
        EE_POS_TGT_5 = np.asmatrix([0.3013209, 0.1647450,
                                    0.3746])  # S top right
        EE_POS_TGT_6 = np.asmatrix([0.3349774, 0.1570571, 0.3746])  # S midlle

        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2
        ee_pos_tgt_3 = EE_POS_TGT_3
        ee_pos_tgt_4 = EE_POS_TGT_4
        ee_pos_tgt_5 = EE_POS_TGT_5
        ee_pos_tgt_6 = EE_POS_TGT_6

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        target3 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_3, ee_rot_tgt).T)
        target4 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_4, ee_rot_tgt).T)
        target5 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_5, ee_rot_tgt).T)
        target6 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_6, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        all_targets = [target1, target2, target3, target4, target5, target6]
        self.realgoal = random.choice(all_targets)
        # self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeCorrect realgoal: ", self.realgoal)

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")
        # target.time_from_start.nsecs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:self.scara_chain.getNrOfJoints()],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.realgoal  #self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1), ]

            # return np.r_[np.reshape(last_observations, -1),
            #               np.reshape(ee_points, -1),
            #               np.reshape(ee_velocities, -1),]

            return {
                'observation': np.reshape(last_observations, -1),
                'achieved_goal': current_ee_tgt,
                'desired_goal': self.realgoal,
                'distance_from_goal': ee_points,
            }

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def goal_distance(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        # print("goal_distance", np.linalg.norm(goal_a - goal_b, axis=-1))
        return np.linalg.norm(goal_a - goal_b, axis=-1)

    def compute_reward(self, achieved_goal, desired_goal, info):
        # Compute distance between goal and the achieved goal.
        d = self.goal_distance(achieved_goal, desired_goal)
        print("compute_reward:", d)
        if self.reward_type == 'sparse':
            return -(d > self.distance_threshold).astype(np.float32)
        else:
            return -d

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator += 1
        self.reward_dist = -self.rmse_func(self.ob['distance_from_goal'])

        # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if (self.rmse_func(self.ob['distance_from_goal']) < 0.005):
            self.reward = 1 - self.rmse_func(
                self.ob['distance_from_goal']
            )  # Make the reward increase as the distance decreases
            print("Reward is: ", self.reward)
        else:
            self.reward = self.reward_dist

        done = (bool(abs(self.reward_dist) < 0.005)) or (
            self.iterator > self.max_episode_steps)

        self._pub.publish(
            self.get_trajectory_message(
                action[:self.scara_chain.getNrOfJoints()]))

        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()
        # print("in step, action: ", action[:3])
        # print("in step, observation: ", self.ob[:3])

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def _reset(self):

        self.iterator = 0
        if self.reset_jnts is True:
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))
            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                time.sleep(int(self.slowness))
            elif (self.slowness_unit == 'nsec'):
                time.sleep(int(self.slowness /
                               1000000000))  # using nanoseconds
            else:
                print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
class GazeboModularScara4And3DOFv1Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "ModularScara4_3_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables

        self.urdf_path = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_model_4_and_3joints.urdf"

        self.slowness = 1
        self.slowness_unit = 'sec'

        #self.slowness = 10000000
        #self.slowness_unit = 'nsec'

        #choose random enviroment at the startup
        self.randomizeRobot()
        # Seed the environment
        self._seed()

    def init_3dof_robot(self):
        print("I am in enviroment 3DOF")
        self._observation_msg = None
        print(self._observation_msg)
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.action_space = None
        self.max_episode_steps = 1000  # now used in all algorithms
        self.iterator = 0

        self._time_lock = threading.RLock()
        self.choose_robot = 0
        self.environment = None

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121,
                                  0.3746])  # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.

        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'scara_e1_rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [
            BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1,
            SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
            SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2,
            SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3,
            SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.realgoal = ee_tgt

        self.environment = {
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            'reset_conditions': reset_condition,
            'tree_path': self.urdf_path,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }
        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 6  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # Topics for the robot publisher and subscriber.
        # self.enviroment = None
        JOINT_PUBLISHER = '/scara_controller_3dof/command'
        JOINT_SUBSCRIBER = '/scara_controller_3dof/state'

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub_3dof = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory)
        self._sub_3dof = rospy.Subscriber(JOINT_SUBSCRIBER,
                                          JointTrajectoryControllerState,
                                          self.observation_callback)

    def init_4dof_robot(self):
        print("I am in enviroment 4DOF")
        self._observation_msg_4dof = None
        print(self._observation_msg_4dof)
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000  # now used in all algorithms
        self.iterator = 0
        self.reset_jnts = False
        self._time_lock = threading.RLock()
        self.choose_robot = 1
        self.environment = None

        #############################
        #   Environment hyperparams
        #############################

        ########################################################################################
        '''
        Add here the stuff needed for the 4DOF robot
        '''

        EE_POS_TGT_4DOF = np.asmatrix([0.3305805, -0.1326121,
                                       0.4868])  # center of the H
        EE_ROT_TGT_4DOF = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS_4DOF = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES_4DOF = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS_4DOF = np.array([0., 0., 0., 0.])

        # JOINT_PUBLISHER_4DOF = '/scara_controller_4dof/command'
        # JOINT_SUBSCRIBER_4DOF = '/scara_controller_4dof/state'
        # joint names:
        MOTOR1_4DOF_JOINT = 'motor1_4dof'
        MOTOR2_4DOF_JOINT = 'motor2_4dof'
        MOTOR3_4DOF_JOINT = 'motor3_4dof'
        MOTOR4_4DOF_JOINT = 'motor4_4dof'

        # Set constants for links
        BASE_4DOF = '4_dof_scara_e1_base_link'
        BASE_4DOF_MOTOR = '4_dof_scara_e1_base_motor'

        SCARA_4DOF_MOTOR1 = '4_dof_scara_e1_motor1'
        SCARA_4DOF_INSIDE_MOTOR1 = '4_dof_scara_e1_motor1_inside'
        SCARA_4DOF_SUPPORT_MOTOR1 = '4_dof_scara_e1_motor1_support'
        SCARA_4DOF_BAR_MOTOR1 = '4_dof_scara_e1_bar1'
        SCARA_4DOF_FIXBAR_MOTOR1 = '4_dof_scara_e1_fixbar1'

        SCARA_4DOF_MOTOR2 = '4_dof_scara_e1_motor2'
        SCARA_4DOF_INSIDE_MOTOR2 = '4_dof_scara_e1_motor2_inside'
        SCARA_4DOF_SUPPORT_MOTOR2 = '4_dof_scara_e1_motor2_support'
        SCARA_4DOF_BAR_MOTOR2 = '4_dof_scara_e1_bar2'
        SCARA_4DOF_FIXBAR_MOTOR2 = '4_dof_scara_e1_fixbar2'

        SCARA_4DOF_MOTOR3 = '4_dof_scara_e1_motor3'
        SCARA_4DOF_INSIDE_MOTOR3 = '4_dof_scara_e1_motor3_inside'
        SCARA_4DOF_SUPPORT_MOTOR3 = '4_dof_scara_e1_motor3_support'
        SCARA_4DOF_BAR_MOTOR3 = '4_dof_scara_e1_bar3'
        SCARA_4DOF_FIXBAR_MOTOR3 = '4_dof_scara_e1_fixbar3'

        SCARA_4DOF_MOTOR4 = '4_dof_scara_e1_motor4'
        SCARA_4DOF_INSIDE_MOTOR4 = '4_dof_scara_e1_motor4_inside'
        SCARA_4DOF_SUPPORT_MOTOR4 = '4_dof_scara_e1_motor4_support'
        SCARA_4DOF_BAR_MOTOR4 = '4_dof_scara_e1_bar4'
        SCARA_4DOF_FIXBAR_MOTOR4 = '4_dof_scara_e1_fixbar4'

        SCARA_4DOF_RANGEFINDER = '4_dof_scara_e1_rangefinder'

        EE_LINK_4DOF = '4_dof_ee_link'
        JOINT_ORDER_4DOF = [
            MOTOR1_4DOF_JOINT, MOTOR2_4DOF_JOINT, MOTOR3_4DOF_JOINT,
            MOTOR4_4DOF_JOINT
        ]
        LINK_NAMES_4DOF = [
            BASE_4DOF, BASE_4DOF_MOTOR, SCARA_4DOF_MOTOR1,
            SCARA_4DOF_INSIDE_MOTOR1, SCARA_4DOF_SUPPORT_MOTOR1,
            SCARA_4DOF_BAR_MOTOR1, SCARA_4DOF_FIXBAR_MOTOR1, SCARA_4DOF_MOTOR2,
            SCARA_4DOF_INSIDE_MOTOR2, SCARA_4DOF_SUPPORT_MOTOR2,
            SCARA_4DOF_BAR_MOTOR2, SCARA_4DOF_FIXBAR_MOTOR2, SCARA_4DOF_MOTOR3,
            SCARA_4DOF_INSIDE_MOTOR3, SCARA_4DOF_SUPPORT_MOTOR3,
            SCARA_4DOF_BAR_MOTOR3, SCARA_4DOF_FIXBAR_MOTOR3, SCARA_4DOF_MOTOR4,
            SCARA_4DOF_INSIDE_MOTOR4, SCARA_4DOF_SUPPORT_MOTOR4, EE_LINK_4DOF
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS_4DOF,
            'initial_velocities': []
        }
        #################################################################################################################################################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"

        m_joint_order = copy.deepcopy(JOINT_ORDER_4DOF)
        m_link_names = copy.deepcopy(LINK_NAMES_4DOF)
        ee_pos_tgt = EE_POS_TGT_4DOF
        ee_rot_tgt = EE_ROT_TGT_4DOF

        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS_4DOF, ee_pos_tgt, ee_rot_tgt).T)
        self.realgoal = ee_tgt

        self.environment = {
            'ee_points_tgt': self.realgoal,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            'reset_conditions': reset_condition,
            'tree_path': self.urdf_path,
            # 'joint_publisher': m_joint_publishers,
            # 'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS_4DOF,
            'end_effector_velocities': EE_VELOCITIES_4DOF,
        }

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # observation = self.take_observation()
        # assert not done
        # self.obs_dim = observation.size
        """
        obs_dim is defined as:
        num_dof + end_effector_points=3 + end_effector_velocities=3
        end_effector_points and end_effector_velocities is constant and equals 3
        """
        #
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 6  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # Topics for the robot publisher and subscriber.
        # self.enviroment = None
        JOINT_PUBLISHER_4DOF = '/scara_controller_4dof/command'
        JOINT_SUBSCRIBER_4DOF = '/scara_controller_4dof/state'

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub_4dof = rospy.Publisher('/scara_controller_4dof/command',
                                         JointTrajectory)
        self._sub_4dof = rospy.Subscriber('/scara_controller_4dof/state',
                                          JointTrajectoryControllerState,
                                          self.observation_callback_4dof)

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message

    def observation_callback_4dof(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg_4dof = message

    #def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=True):
    def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=False):
        self.slowness = slowness
        self.slowness_unit = slowness_unit
        self.reset_jnts = reset_jnts
        print("slowness: ", self.slowness)
        print("slowness_unit: ", self.slowness_unit, "type of variable: ",
              type(slowness_unit))
        print("reset joints: ", self.reset_jnts, "type of variable: ",
              type(self.reset_jnts))

    def randomizeTarget(self):
        print("calling randomize target")
        if self.choose_robot is 0:
            print("Randomize target for the 3 DoF")
            EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366,
                                        0.3746])  # center of O
            EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121,
                                        0.3746])  # center of the H
            EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
            EE_POINTS = np.asmatrix([[0, 0, 0]])

            ee_pos_tgt_1 = EE_POS_TGT_1
            ee_pos_tgt_2 = EE_POS_TGT_2

            # leave rotation target same since in scara we do not have rotation of the end-effector
            ee_rot_tgt = EE_ROT_TGT

            # Initialize target end effector position
            target1 = np.ndarray.flatten(
                get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
            target2 = np.ndarray.flatten(
                get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
            """
            This is for initial test only, we need to change this in the future to be more realistic.
            E.g. covered target -> go to other target. This could be implemented for example with vision.
            """
            self.realgoal = target1 if np.random.uniform() < 0.5 else target2
            print("randomizeTarget realgoal: ", self.realgoal)
        else:
            print("Randomize target for the 4 DoF")
            EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366,
                                        0.4868])  # center of O
            EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121,
                                        0.4868])  # center of the H
            EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
            EE_POINTS = np.asmatrix([[0, 0, 0]])

            ee_pos_tgt_1 = EE_POS_TGT_1
            ee_pos_tgt_2 = EE_POS_TGT_2

            # leave rotation target same since in scara we do not have rotation of the end-effector
            ee_rot_tgt = EE_ROT_TGT

            target1 = np.ndarray.flatten(
                get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
            target2 = np.ndarray.flatten(
                get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
            """
            This is for initial test only, we need to change this in the future to be more realistic.
            E.g. covered target -> go to other target. This could be implemented for example with vision.
            """
            self.realgoal = target1 if np.random.uniform() < 0.5 else target2
            print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeRobot(self):
        print("calling randomize correct")
        #try to choose environment
        if np.random.uniform() < 0.5:
            self.choose_robot = 0
            print("which robot is? ", self.choose_robot)
            enviroment_test = self.init_3dof_robot()
        else:
            self.choose_robot = 1
            print("which robot is?", self.choose_robot)
            enviroment_test = self.init_4dof_robot()

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # print('joint messages: ', message.joint_names)
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False
        if self.choose_robot is 0:
            obs_message = self._observation_msg
        else:
            obs_message = self._observation_msg_4dof
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:self.scara_chain.getNrOfJoints()],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.realgoal  #self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            if self.choose_robot is 0:
                last_observations = np.insert(last_observations, 3, 0.)
                # print('last_observations_extension: ', last_observations)
                # last_observations.append(0.0)
            return np.r_[np.reshape(last_observations, -1),
                         np.reshape(ee_points, -1),
                         np.reshape(ee_velocities, -1), ]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator += 1
        self.reward_dist = -self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints():
                    (self.scara_chain.getNrOfJoints() + 3)])

        # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(
                self.scara_chain.getNrOfJoints() + 3)]) < 0.005):
            self.reward = 1 - self.rmse_func(
                self.ob[self.scara_chain.getNrOfJoints():(
                    self.scara_chain.getNrOfJoints() +
                    3)])  # Make the reward increase as the distance decreases
            print("Reward is: ", self.reward)
        else:
            self.reward = self.reward_dist

        # Calculate if the env has been solved
        done = bool(abs(self.reward_dist) < 0.005) or (self.iterator >
                                                       self.max_episode_steps)

        # Execute "action"
        if self.choose_robot is 0:
            self._pub_3dof.publish(
                self.get_trajectory_message(
                    action[:self.scara_chain.getNrOfJoints()]))
        else:
            self._pub_4dof.publish(
                self.get_trajectory_message(
                    action[:self.scara_chain.getNrOfJoints()]))

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observations, rewards, etc.
        ee_point = self.ob[self.scara_chain.getNrOfJoints():(
            self.scara_chain.getNrOfJoints() + 3)] + self.realgoal
        ee_point_eucledian = np.linalg.norm(
            self.ob[self.scara_chain.getNrOfJoints():(
                self.scara_chain.getNrOfJoints() + 3)])
        return self.ob, self.reward, done, {}, ee_point, ee_point_eucledian

    def _reset(self):
        """
        Reset the agent for a particular experiment condition.
        """
        self.iterator = 0

        if self.reset_jnts is True:
            if self.choose_robot is 0:
                self._pub_3dof.publish(
                    self.get_trajectory_message(
                        self.environment['reset_conditions']
                        ['initial_positions']))
            elif self.choose_robot is 1:
                self._pub_4dof.publish(
                    self.get_trajectory_message(
                        self.environment['reset_conditions']
                        ['initial_positions']))

            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                time.sleep(int(self.slowness))
            elif (self.slowness_unit == 'nsec'):
                time.sleep(int(self.slowness /
                               1000000000))  # using nanoseconds
            else:
                print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
Esempio n. 10
0
class Box3DOFv1Env(gazebo_env.GazeboEnv):
    def __init__(self):

        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "Box_Vision_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self._camera_image = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746])
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0, 0, 0])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.
        # # Set the number of seconds per step of a sample.
        # TIMESTEP = 0.01  # Typically 0.01.
        # # Set the number of timesteps per sample.
        # STEP_COUNT = 100  # Typically 100.
        # # Set the number of samples per condition.
        # SAMPLE_COUNT = 5  # Typically 5.
        # # Set the number of trajectory iterations to collect.
        # ITERATIONS = 20  # Typically 10.
        # How much time does it take to execute the trajectory (in seconds)
        slowness = 2

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'scara_e1_rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [
            BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1,
            SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
            SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2,
            SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3,
            SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        #############################
        rospack = rospkg.RosPack()
        prefix_path = rospack.get_path('scara_e1_description')

        if (prefix_path == None):
            print("I can't find scara_e1_description")
            sys.exit(0)

        URDF_PATH = prefix_path + "/urdf/scara_e1_model_3joints_simplified_camera_behind.urdf.xacro"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.environment = {
            # 'dt': TIMESTEP,
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
            # 'num_samples': SAMPLE_COUNT,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command',
                                    JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state',
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        self._image_sub = rospy.Subscriber('/scara/camera/image_raw', Image,
                                           self.camera_callback)

        #self._sub = rospy.Subscriber('/scara/camera', JointTrajectoryControllerState, self.observation_callback)

        # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions
        # rclpy.init(args=None)
        # self.ros2_node = rclpy.create_node('robot_ai_node')
        # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER)
        # # self._callbacks = partial(self.observation_callback, robot_id=0)
        # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data)
        # # self._time_lock = threading.RLock()

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = 9  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)
        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                            SpawnModel)
        # Seed the environment
        self._seed()

        model_xml = "<?xml version=\"1.0\"?> \
                    <robot name=\"myfirst\"> \
                      <link name=\"world\"> \
                      </link>\
                      <link name=\"cylinder0\">\
                        <visual>\
                          <geometry>\
                            <sphere radius=\"0.01\"/>\
                          </geometry>\
                          <origin xyz=\"0 0 0\"/>\
                          <material name=\"rojotransparente\">\
                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                          </material>\
                        </visual>\
                        <inertial>\
                          <mass value=\"5.0\"/>\
                          <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                        </inertial>\
                      </link>\
                      <joint name=\"world_to_base\" type=\"fixed\"> \
                        <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                        <parent link=\"world\"/>\
                        <child link=\"cylinder0\"/>\
                      </joint>\
                      <gazebo reference=\"cylinder0\">\
                        <material>Gazebo/GreenTransparent</material>\
                      </gazebo>\
                    </robot>"

        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0, 0]
        pose.position.y = EE_POS_TGT[0, 1]
        pose.position.z = 0.055  #EE_POS_TGT[0,2];

        #Static obstacle (not in original code)
        # pose.position.x = 0.25;#
        # pose.position.y = 0.07;#
        # pose.position.z = 0.0;#

        pose.orientation.x = 0
        pose.orientation.y = 0
        pose.orientation.z = 0
        pose.orientation.w = 0
        reference_frame = ""
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        self.add_model(model_name="target",
                       model_xml=model_xml,
                       robot_namespace="",
                       initial_pose=pose,
                       reference_frame="")

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message
        # print("!!!!!!!!!Received an observation")

    def camera_callback(self, message):
        """
        Callback method for the subscriber of Image
        """
        self._camera_image = message
        print("\ncamera_image.height", self._camera_image.height)
        print("\ncamera_image.width", self._camera_image.width)

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        target.time_from_start.secs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(3)
        # Initialize a joint array for the present 6 joint angles.
        angles = JntArray(3)
        # Construct the joint array from the most recent joint angles.
        for i in range(3):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, 3)
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def addObstacle(self):
        print("\nADD OBSTACLE")
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        try:
            # #NORMAL BOX
            model_xml = "<?xml version=\"1.0\"?> \
                                <robot name=\"myfirst\"> \
                                <link name=\"world\"> \
                                </link>\
                                <link name=\"cylinder0\">\
                                <visual>\
                                    <geometry>\
                                        <box size=\".05 .05 .05\"/>\
                                        </geometry>\
                                    <origin xyz=\"0 0 0\"/>\
                                    <material name=\"red\">\
                                        <ambient>0.5 0.5 1.0 0.1</ambient>\
                                        <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                                    </material>\
                                </visual>\
                                <inertial>\
                                    <mass value=\"10.\"/>\
                                    <inertia ixx=\"1\" ixy=\".0\" ixz=\"0.0\" iyy=\"1\" iyz=\"0.0\" izz=\"1\"/>\
                                </inertial>\
                                <collision>\
                                    <geometry>\
                                        <box size=\".05 .05 .05\"/>\
                                    </geometry>\
                                </collision>\
                                </link>\
                                <joint name=\"world_to_base\" type=\"fixed\"> \
                                    <origin xyz=\"0 0 0.3\" rpy=\"0 0 0\"/>\
                                    <parent link=\"world\"/>\
                                    <child link=\"cylinder0\"/>\
                                </joint>\
                                <gazebo reference=\"cylinder0\">\
                                    <material>Gazebo/Red</material>\
                                </gazebo>\
                        </robot>"

            robot_namespace = ""
            pose = Pose()
            number = random.randint(0, 4)
            #pose.position.x = 0.15;#
            pose.position.x = 0.3
            #
            pose.position.y = 0.07
            #
            pose.position.z = 0.05
            pose.orientation.x = 0
            pose.orientation.y = 0
            pose.orientation.z = 0
            pose.orientation.w = 0

            reference_frame = ""

            self.add_model(model_name="obstacle",
                           model_xml=model_xml,
                           robot_namespace=robot_namespace,
                           initial_pose=pose,
                           reference_frame="")
        except (rospy.ServiceException) as e:
            print("/gazebo/spawn_urdf_model service call failed")

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        # # ROS 2, Acquire the lock to prevent the subscriber thread from
        # # updating times or observation messages.
        # self._time_lock.acquire(True)
        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:3],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1), ]

            return np.r_[np.reshape(last_observations, -1),
                         np.reshape(ee_points, -1),
                         np.reshape(ee_velocities, -1), ]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - observation
            - reward
            - done (status)
            - dictionary (#TODO clarify)
        """
        # Unpause simulation
        rospy.wait_for_service('/gazebo/unpause_physics')
        try:
            self.unpause()
        except (rospy.ServiceException) as e:
            print("/gazebo/unpause_physics service call failed")

        # Execute "action"
        # if rclpy.ok(): # ROS 2 code
        self._pub.publish(self.get_trajectory_message(action[:3]))

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Pause simulation
        rospy.wait_for_service('/gazebo/pause_physics')
        try:
            #resp_pause = pause.call()
            self.pause()
        except (rospy.ServiceException) as e:
            print("/gazebo/pause_physics service call failed")

        # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        # self.ob = take_observation()

        # # Old implementation for reward calculation
        # # Calculate reward based on observation
        # if np.linalg.norm(self.ob[1]) < 0.005:
        #     self.reward_dist = 1000.0 * np.linalg.norm(self.ob[1])#- 10.0 * np.linalg.norm(ee_points)
        #     # self.reward_ctrl = np.linalg.norm(action)#np.square(action).sum()
        #     done = True
        #     print("self.reward_dist: ", self.reward_dist, "self.reward_ctrl: ", self.reward_ctrl)
        # else:
        #     self.reward_dist = - np.linalg.norm(self.ob[1])
        #     # self.reward_ctrl = - np.linalg.norm(action)# np.square(action).sum()
        # # self.reward = 2.0 * self.reward_dist + 0.01 * self.reward_ctrl
        # #removed the control reward, maybe we should add it later.
        # self.reward = self.reward_dist

        # Calculate the reward
        #  Heuristic used for the calculation of the reward:
        #   - calculate the residual mean square error (rmse) between the current
        #       end effector point and the target point
        #   - in the case it's bigger than 5 mm, reward is the negative value of the rmse
        #   - in case where it's smaller than 5 mm, reward is calculated by substracting 100 - rmse
        # print(self.ob[2])
        self.reward_dist = -self.rmse_func(self.ob[2])
        if abs(self.reward_dist) < 0.005:
            # print("reward (Eucledian dist (mm)): ", -1000 * self.reward_dist)
            self.reward = 100 + self.reward_dist
        else:
            # print("reward (Eucledian dist (mm)): ", -1000 * self.reward_dist)
            self.reward = self.reward_dist

        # print("reward: ", self.reward)

        # Calculate if the env has been solved
        done = bool(abs(self.reward_dist) < 0.005)

        # self._time_lock.release() # ROS 2 code

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def _reset(self):
        """
        Reset the agent for a particular experiment condition.
        """
        # Resets the state of the environment and returns an initial observation.
        rospy.wait_for_service('/gazebo/reset_simulation')
        try:
            #reset_proxy.call()
            self.reset_proxy()
        except (rospy.ServiceException) as e:
            print("/gazebo/reset_simulation service call failed")

        # Unpause simulation
        rospy.wait_for_service('/gazebo/unpause_physics')
        try:
            self.unpause()
        except (rospy.ServiceException) as e:
            print("/gazebo/unpause_physics service call failed")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Pause the simulation
        rospy.wait_for_service('/gazebo/pause_physics')
        try:
            self.pause()
        except (rospy.ServiceException) as e:
            print("/gazebo/pause_physics service call failed")

        # Return the corresponding observation
        return self.ob
Esempio n. 11
0
class GazeboNewArticulatedv1Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "NewArticulatedArm_v1.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000  # now used in all algorithms
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'
        self.reset_jnts = True

        self._time_lock = threading.RLock()

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([0.2, 0.2, 0.2868])  # 200 cm from the z axis
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        # STEP_COUNT = 2  # Typically 100.
        # slowness = 10000000 # 10 ms, where 1 second is real life simulation
        # slowness = 1000000 # 1 ms, where 1 second is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation
        # slowness = 10 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        MOTOR4_JOINT = 'motor4'
        MOTOR5_JOINT = 'motor5'

        # Set constants for links
        BASE = 'new_arm_base_link'

        NEW_ART_MOTOR1 = 'motor1_link'
        NEW_ART_MOTOR2 = 'link1'
        NEW_ART_MOTOR3 = 'motor3_link'
        NEW_ART_MOTOR4 = 'motor4_link'
        NEW_ART_MOTOR5 = 'motor5_link'

        # EE_LINK = 'ee_link'
        JOINT_ORDER = [
            MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT,
            MOTOR5_JOINT
        ]
        LINK_NAMES = [
            BASE, NEW_ART_MOTOR1, NEW_ART_MOTOR2, NEW_ART_MOTOR3,
            NEW_ART_MOTOR4, NEW_ART_MOTOR5
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/new_articulated/new_arm.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)
        self.realgoal = ee_tgt

        self.environment = {
            # rk changed this to for the mlsh
            # 'ee_points_tgt': ee_tgt,
            'ee_points_tgt': self.realgoal,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/new_arm_controller/command',
                                    JointTrajectory)
        self._sub = rospy.Subscriber('/new_arm_controller/state',
                                     JointTrajectoryControllerState,
                                     self.observation_callback)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # observation = self.take_observation()
        # assert not done
        # self.obs_dim = observation.size
        """
        obs_dim is defined as:
        num_dof + end_effector_points=3 + end_effector_velocities=3
        end_effector_points and end_effector_velocities is constant and equals 3
        """
        #
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 6  # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # # Gazebo specific services to start/stop its behavior and
        # # facilitate the overall RL environment
        # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty)
        # self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel)
        # self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel)
        #
        model_xml = "<?xml version=\"1.0\"?> \
                    <robot name=\"myfirst\"> \
                      <link name=\"world\"> \
                      </link>\
                      <link name=\"cylinder0\">\
                        <visual>\
                          <geometry>\
                            <sphere radius=\"0.01\"/>\
                          </geometry>\
                          <origin xyz=\"0 0 0\"/>\
                          <material name=\"rojotransparente\">\
                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                          </material>\
                        </visual>\
                        <inertial>\
                          <mass value=\"5.0\"/>\
                          <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                        </inertial>\
                      </link>\
                      <joint name=\"world_to_base\" type=\"fixed\"> \
                        <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                        <parent link=\"world\"/>\
                        <child link=\"cylinder0\"/>\
                      </joint>\
                      <gazebo reference=\"cylinder0\">\
                        <material>Gazebo/GreenTransparent</material>\
                      </gazebo>\
                    </robot>"

        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0, 0]
        pose.position.y = EE_POS_TGT[0, 1]
        pose.position.z = 0.055  #EE_POS_TGT[0,2];

        #Static obstacle (not in original code)
        # pose.position.x = 0.25;#
        # pose.position.y = 0.07;#
        # pose.position.z = 0.0;#

        pose.orientation.x = 0
        pose.orientation.y = 0
        pose.orientation.z = 0
        pose.orientation.w = 0
        reference_frame = ""
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        self.add_model(model_name="target",
                       model_xml=model_xml,
                       robot_namespace="",
                       initial_pose=pose,
                       reference_frame="")

        #self.addObstacle()

        # Seed the environment
        # Seed the environment
        self._seed()

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message

    def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True):
        self.slowness = slowness
        self.slowness_unit = slowness_unit
        self.reset_jnts = reset_jnts
        print("slowness: ", self.slowness)
        print("slowness_unit: ", self.slowness_unit, "type of variable: ",
              type(slowness_unit))
        print("reset joints: ", self.reset_jnts, "type of variable: ",
              type(self.reset_jnts))

    def randomizeTargetPositions(self):
        """
        The goal is to test with randomized positions which range between the boundries of the H-ROS logo
        """
        print("In randomize target positions.")
        EE_POS_TGT_RANDOM1 = np.asmatrix([
            np.random.uniform(0.2852485, 0.3883636),
            np.random.uniform(-0.1746508, 0.1701576), 0.2868
        ])  # boundry box of the first half H-ROS letters with +-0.01 offset
        EE_POS_TGT_RANDOM2 = np.asmatrix([
            np.random.uniform(0.2852485, 0.3883636),
            np.random.uniform(-0.1746508, 0.1701576), 0.2868
        ])  # boundry box of the H-ROS letters with +-0.01 offset
        # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1
        ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT
        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T)

        # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)

        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeTarget(self):
        print("calling randomize target")

        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366,
                                    0.2868])  # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121,
                                    0.2868])  # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def randomizeMultipleTargets(self):
        print("calling randomize multiple target")

        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366,
                                    0.2868])  # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121,
                                    0.2868])  # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)
        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:self.scara_chain.getNrOfJoints()],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.realgoal  #self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1), ]

            return np.r_[np.reshape(last_observations, -1),
                         np.reshape(ee_points, -1),
                         np.reshape(ee_velocities, -1), ]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator += 1
        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        # self.ob = take_observation()

        # self.reward_dist = self.rmse_func(self.ob[3:6])
        # # print("reward_dist :", self.reward_dist)
        # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases
        #
        # # Calculate if the env has been solved
        # done = bool(abs(self.reward_dist) < 0.005)

        self.reward_dist = -self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints():
                    (self.scara_chain.getNrOfJoints() + 3)])

        # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(
                self.scara_chain.getNrOfJoints() + 3)]) < 0.005):
            self.reward = 1 - self.rmse_func(
                self.ob[self.scara_chain.getNrOfJoints():(
                    self.scara_chain.getNrOfJoints() +
                    3)])  # Make the reward increase as the distance decreases
            print("Reward is: ", self.reward)
        else:
            self.reward = self.reward_dist

        # print("reward: ", self.reward)
        # print("rmse_func: ", self.rmse_func(ee_points))

        # Calculate if the env has been solved
        done = bool(abs(self.reward_dist) < 0.005) or (self.iterator >
                                                       self.max_episode_steps)

        # # Unpause simulation
        # rospy.wait_for_service('/gazebo/unpause_physics')
        # try:
        #     self.unpause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/unpause_physics service call failed")

        # Execute "action"
        self._pub.publish(
            self.get_trajectory_message(
                action[:self.scara_chain.getNrOfJoints()]))
        #TODO: wait until action gets executed
        # When adding this the algorithm does not converge
        # time.sleep(int(self.environment['slowness']))
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds

        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def goToInit(self):
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()
        # # Go to initial position and wait until it arrives there
        # Wait until the arm is within epsilon of reset configuration.
        self._time_lock.acquire(True, -1)
        with self._time_lock:
            self._currently_resetting = True
        self._time_lock.release()

        if self._currently_resetting:
            epsilon = 1e-3
            reset_action = self.environment['reset_conditions'][
                'initial_positions']
            now_action = self._observation_msg.actual.positions
            du = np.linalg.norm(reset_action - now_action, float(np.inf))
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))
            if du > epsilon:
                self._currently_resetting = True
                self._pub.publish(
                    self.get_trajectory_message(
                        self.environment['reset_conditions']
                        ['initial_positions']))
                time.sleep(3)

    def _reset(self):
        """
        Reset the agent for a particular experiment condition.
        """

        self.iterator = 0

        # self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']))
        # time.sleep(int(self.environment['slowness'])) # using seconds
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds
        # # time.sleep(int(self.environment['slowness']))

        if self.reset_jnts is True:
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))
            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                time.sleep(int(self.slowness))
            elif (self.slowness_unit == 'nsec'):
                time.sleep(int(self.slowness /
                               1000000000))  # using nanoseconds
            else:
                print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
Esempio n. 12
0
class ARIACPickv0Env(gazebo_env.GazeboEnv):
    def __init__(self):

        self.slowness = 1
        self.slowness_unit = 'sec'
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "ARIACPick_v0.launch")

        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'

        # Set constants for joints
        SHOULDER_PAN_JOINT = 'shoulder_pan_joint'
        SHOULDER_LIFT_JOINT = 'shoulder_lift_joint'
        ELBOW_JOINT = 'elbow_joint'
        WRIST_1_JOINT = 'wrist_1_joint'
        WRIST_2_JOINT = 'wrist_2_joint'
        WRIST_3_JOINT = 'wrist_3_joint'

        # Set constants for links
        BASE = 'base'
        BASE_LINK = 'base_link'
        SHOULDER_LINK = 'shoulder_link'
        UPPER_ARM_LINK = 'upper_arm_link'
        FOREARM_LINK = 'forearm_link'
        WRIST_1_LINK = 'wrist_1_link'
        WRIST_2_LINK = 'wrist_2_link'
        WRIST_3_LINK = 'wrist_3_link'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [SHOULDER_PAN_JOINT, SHOULDER_LIFT_JOINT, ELBOW_JOINT,
                           WRIST_1_JOINT, WRIST_2_JOINT, WRIST_3_JOINT]
        # JOINT_ORDER= ['linear_arm_actuator_joint', 'shoulder_pan_joint', 'shoulder_lift_joint', 'elbow_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']

        LINK_NAMES = [BASE, BASE_LINK, SHOULDER_LINK, UPPER_ARM_LINK, FOREARM_LINK,
                      WRIST_1_LINK, WRIST_2_LINK, WRIST_3_LINK, EE_LINK]

        LINK_NAMES2 = [ BASE,
                        BASE_LINK,
                        SHOULDER_LINK,
                        UPPER_ARM_LINK,
                        FOREARM_LINK,
                        WRIST_1_LINK,
                        WRIST_2_LINK,
                        WRIST_3_LINK,
                        EE_LINK
                      ]
        EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])

        INITIAL_JOINTS = np.array([0., 0., 0.])
        STEP_COUNT = 2  # Typically 100.

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
             'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = "/home/erle/ros_scara/ur10.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.realgoal = ee_tgt

        self.environment = {
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
            # 'num_samples': SAMPLE_COUNT,
        }

        rospy.wait_for_service('/ariac/gripper/control')
        self.vacuum_gripper_control = rospy.ServiceProxy('/ariac/gripper/control', VacuumGripperControl)

        self._pub = rospy.Publisher('/ariac/arm/command', JointTrajectory)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self.step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty)

        # Seed the environment
        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):

        state = []
        reward = []
        done = False

        data_arm_state = None
        while data_arm_state is None:
            try:
                data_arm_state = rospy.wait_for_message('/ariac/arm/state', JointTrajectoryControllerState, timeout=5)
            except:
                pass

        data_arm_state2 = JointTrajectoryControllerState()
        data_arm_state2.joint_names = data_arm_state.joint_names[1:]
        data_arm_state2.desired.positions  = data_arm_state.desired.positions[1:]
        data_arm_state2.desired.velocities = data_arm_state.desired.velocities[1:]
        data_arm_state2.desired.accelerations = data_arm_state.desired.accelerations[1:]
        data_arm_state2.actual.positions = data_arm_state.actual.positions[1:]
        data_arm_state2.actual.velocities = data_arm_state.actual.velocities[1:]
        data_arm_state2.actual.accelerations = data_arm_state.actual.accelerations[1:]
        data_arm_state2.error.positions = data_arm_state.error.positions[1:]
        data_arm_state2.error.velocities = data_arm_state.error.velocities[1:]
        data_arm_state2.error.accelerations = data_arm_state.error.accelerations[1:]
        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(data_arm_state2, self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(self.scara_chain,
                                        self.environment['link_names'],
                                        last_observations[:self.scara_chain.getNrOfJoints()],
                                        base_link=self.environment['link_names'][0],
                                        end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'],
                                                              trans,
                                                              rot).T)
            ee_points = current_ee_tgt - self.realgoal#self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians,
                                                                   self.environment['end_effector_points'],
                                                                   rot)
            ee_velocities = self.get_ee_points_velocities(ee_link_jacobians,
                                                           self.environment['end_effector_points'],
                                                           rot,
                                                           last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1),]

        self._pub.publish(self.get_trajectory_message([0.018231524968342513,
                                                       3.2196073949389703 + np.random.rand(1)[0] - .5,
                                                      -0.6542426695478509 + np.random.rand(1)[0] - .5,
                                                       1.7401498071871018 + np.random.rand(1)[0] - .5,
                                                       3.7354939354077596 + np.random.rand(1)[0] - .5,
                                                      -1.510707301072792 + np.random.rand(1)[0] - .5,
                                                       0.00014565660628829136 + np.random.rand(1)[0] - .5]))

        data_vacuum_state = None
        while data_vacuum_state is None:
            try:
                data_vacuum_state = rospy.wait_for_message('/ariac/gripper/state', VacuumGripperState, timeout=5)
            except:
                pass

        data_camera = None
        while data_camera is None:
            try:
                data_camera = rospy.wait_for_message('/ariac/logical_camera_1', LogicalCameraImage, timeout=5)
            except:
                pass

        try:
            self.vacuum_gripper_control(enable=bool(state[0]))
        except (rospy.ServiceException) as e:
            print ("/gazebo/reset_simulation service call failed")

        state = [data_arm_state, data_camera, data_vacuum_state]


        return state, reward, done, {}

    def reset(self):

        data_arm_state = None
        while data_arm_state is None:
            try:
                data_arm_state = rospy.wait_for_message('/ariac/arm/state', JointTrajectoryControllerState, timeout=5)
            except:
                pass
        data_camera = None
        while data_camera is None:
            try:
                data_camera = rospy.wait_for_message('/ariac/logical_camera_1', LogicalCameraImage, timeout=5)
            except:
                pass

        state = [data_arm_state, data_camera]

        return state


    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names =  copy.deepcopy(self.environment['joint_order'])
        action_msg.joint_names.insert(0, 'linear_arm_actuator_joint')
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")
        # target.time_from_start.nsecs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty");
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(map(lambda x,y: x in y, message.joint_names,
                    [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(message.actual.positions) # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3),
                                                       ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)
class GazeboModularScara3DOFStaticObstaclev0Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000 # limit the max episode step
        # class variable that iterates to accounts for number of steps per episode
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        INITIAL_JOINTS = np.array([0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        STEP_COUNT = 2  # Typically 100.

        # make sure that the slowness is set properly!! In case we forget them defaults to seconds.
        # if self.slowness is None:
        #     slowness = 1
        #
        # else:
        #     slowness = self.slowness



        # slowness = 100000000 # 1 is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/scara_controller/command'
        JOINT_SUBSCRIBER = '/scara_controller/state'
        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        # Set constants for links
        WORLD = "world"
        BASE = 'scara_e1_base_link'
        BASE_MOTOR = 'scara_e1_base_motor'
        #
        SCARA_MOTOR1 = 'scara_e1_motor1'
        SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside'
        SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support'
        SCARA_BAR_MOTOR1 = 'scara_e1_bar1'
        SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1'
        #
        SCARA_MOTOR2 = 'scara_e1_motor2'
        SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside'
        SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support'
        SCARA_BAR_MOTOR2 = 'scara_e1_bar2'
        SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2'
        #
        SCARA_MOTOR3 = 'scara_e1_motor3'
        SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside'
        SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support'
        SCARA_BAR_MOTOR3 = 'scara_e1_bar3'
        SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3'
        #
        SCARA_RANGEFINDER = 'scara_e1_rangefinder'
        EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT]
        LINK_NAMES = [BASE, BASE_MOTOR,
                      SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1,
                      SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2,
                      SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3,
                      EE_LINK]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
             'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        #URDF_PATH = "/home/erle/scara_e1_description/urdf/scara_e1_3joints.urdf"
        URDF_PATH = "/home/erle/ros_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT
        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        self.realgoal = ee_tgt

        self.environment = {
            'T': STEP_COUNT,
            'ee_points_tgt': ee_tgt,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
            # 'num_samples': SAMPLE_COUNT,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory)
        self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling _step()
        # # taken from mujoco in OpenAi how to initialize observation space and action space.
        # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints()))
        # assert not done
        # self.obs_dim = observation.size
        self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high)
        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        # self.action_space = spaces.Discrete(3) #F,L,R
        # self.reward_range = (-np.inf, np.inf)

        # Gazebo specific services to start/stop its behavior and
        # facilitate the overall RL environment
        self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty)
        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel)
        self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel)

        model_xml = "<?xml version=\"1.0\"?> \
                    <robot name=\"myfirst\"> \
                      <link name=\"world\"> \
                      </link>\
                      <link name=\"cylinder0\">\
                        <visual>\
                          <geometry>\
                            <sphere radius=\"0.01\"/>\
                          </geometry>\
                          <origin xyz=\"0 0 0\"/>\
                          <material name=\"rojotransparente\">\
                              <ambient>0.5 0.5 1.0 0.1</ambient>\
                              <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                          </material>\
                        </visual>\
                        <inertial>\
                          <mass value=\"5.0\"/>\
                          <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                        </inertial>\
                      </link>\
                      <joint name=\"world_to_base\" type=\"fixed\"> \
                        <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                        <parent link=\"world\"/>\
                        <child link=\"cylinder0\"/>\
                      </joint>\
                      <gazebo reference=\"cylinder0\">\
                        <material>Gazebo/GreenTransparent</material>\
                      </gazebo>\
                    </robot>"
        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0,0];
        pose.position.y = EE_POS_TGT[0,1];
        pose.position.z = 0.055#EE_POS_TGT[0,2];

        #Static obstacle (not in original code)
        # pose.position.x = 0.25;#
        # pose.position.y = 0.07;#
        # pose.position.z = 0.0;#

        pose.orientation.x = 0;
        pose.orientation.y= 0;
        pose.orientation.z = 0;
        pose.orientation.w = 0;
        reference_frame = ""
        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        self.add_model(model_name="target",
                        model_xml=model_xml,
                        robot_namespace="",
                        initial_pose=pose,
                        reference_frame="")

        #self.addObstacle()

        # Seed the environment
        self._seed()

        # self.max_steps_episode = 1000
    def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=False):
            self.slowness = slowness
            self.slowness_unit = slowness_unit
            print("slowness: ", self.slowness)
            print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit))


    def randomizeObstacle(self):
        print("calling randomize obstacle")

        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)

        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        #self.realgoal = target1 if np.random.uniform() < 0.5 else target2


        if np.random.uniform() < 0.5:
             self.addObstacle()
             self.realgoal = target2
             print("\n\nOBSTACLE\n\n")
        else:
             self.removeObstacle()
             self.realgoal = target1
             print("\n\nNO OBSTACLE\n\n")

        print("randomizeObstacle realgoal: ", self.realgoal)
        #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])])
        # 0 = obstacle. 1 = no obstacle.
        # self.realgoal = 0
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H


    #def randomizeCorrect(self):
    def randomizeTarget(self):
        print("calling randomize correct")

        EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O
        EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H
        EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])

        ee_pos_tgt_1 = EE_POS_TGT_1
        ee_pos_tgt_2 = EE_POS_TGT_2

        # leave rotation target same since in scara we do not have rotation of the end-effector
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

        target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T)
        target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T)

        """
        This is for initial test only, we need to change this in the future to be more realistic.
        E.g. covered target -> go to other target. This could be implemented for example with vision.
        """
        self.realgoal = target1 if np.random.uniform() < 0.5 else target2
        print("randomizeTarget realgoal: ", self.realgoal)
        #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])])
        # 0 = obstacle. 1 = no obstacle.
        # self.realgoal = 0
        # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O
        # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H


    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg =  message

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")
        # target.time_from_start.nsecs = self.environment['slowness']
        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty");
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(map(lambda x,y: x in y, message.joint_names,
                    [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(message.actual.positions) # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3),
                                                       ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message, self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(self.scara_chain,
                                        self.environment['link_names'],
                                        last_observations[:self.scara_chain.getNrOfJoints()],
                                        base_link=self.environment['link_names'][0],
                                        end_link=self.environment['link_names'][-1])
            # #
            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # #
            # current_quaternion = np.array([angle]+dir.tolist())#

            # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'],
                                                              trans,
                                                              rot).T)
            ee_points = current_ee_tgt - self.realgoal#self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians,
                                                                   self.environment['end_effector_points'],
                                                                   rot)
            ee_velocities = self.get_ee_points_velocities(ee_link_jacobians,
                                                           self.environment['end_effector_points'],
                                                           rot,
                                                           last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1),]

            return np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(ee_velocities, -1),]

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator+=1
        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        # self.ob = take_observation()

        # self.reward_dist = self.rmse_func(self.ob[3:6])
        # # print("reward_dist :", self.reward_dist)
        # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases
        #
        # # Calculate if the env has been solved
        # done = bool(abs(self.reward_dist) < 0.005)

        # reward as in the enviroment for 4DOF commented out
        self.reward_dist = -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])

        # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005):
            self.reward = 1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases
            print("Reward is: ", self.reward)
        else:
            self.reward = self.reward_dist

        # print("reward: ", self.reward)
        # print("rmse_func: ", self.rmse_func(ee_points))

        # # enviroment V2 reward
        # self.reward_dist = self.rmse_func(self.ob[3:6])
        # # print("reward_dist :", self.reward_dist)
        # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases

        # Calculate if the env has been solved
        done = (bool(abs(self.reward_dist) < 0.005)) or (self.iterator > self.max_episode_steps)

        # # Unpause simulation
        # rospy.wait_for_service('/gazebo/unpause_physics')
        # try:
        #     self.unpause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/unpause_physics service call failed")

        # Execute "action"
        # if rclpy.ok(): # ROS 2 code
        self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))

        #TODO: wait until action gets executed
        # When adding this the algorithm does not converge
        # time.sleep(int(self.environment['slowness']))
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds

        # # Pause simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     #resp_pause = pause.call()
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while(self.ob is None):
            self.ob = self.take_observation()
        # print("in step, action: ", action[:3])
        # print("in step, observation: ", self.ob[:3])

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        return self.ob, self.reward, done, {}

    def addObstacle(self):
            rospy.wait_for_service('/gazebo/spawn_urdf_model')
                                #   <material name=\"green\">\
                                #     <color rgba=\"0 0.8 .8 1\"/>\
                                #   </material>\

            try:
                model_xml = "<?xml version=\"1.0\"?> \
                            <robot name=\"myfirst\"> \
                              <link name=\"world\"> \
                              </link>\
                              <link name=\"cylinder0\">\
                                <visual>\
                                  <geometry>\
                                    <box size=\".04 .04 1\"/>\
                                  </geometry>\
                                  <origin xyz=\"0 0 0\"/>\
                                  <material name=\"blue\">\
                                      <ambient>0.5 0.5 1.0 0.1</ambient>\
                                      <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                                  </material>\
                                </visual>\
                                <inertial>\
                                  <mass value=\"5.0\"/>\
                                  <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                                </inertial>\
                                <collision>\
                                  <geometry>\
                                    <box size=\".04 .04 1\"/>\
                                  </geometry>\
                                  <contact>\
                                    <ode>\
                                        <soft_cfm>0.000000</soft_cfm>\
                                        <soft_erp>0.200000</soft_erp>\
                                        <kp>1000000000000.000000</kp>\
                                        <kd>1.000000</kd>\
                                        <max_vel>100.000000</max_vel>\
                                        <min_depth>0.00001000</min_depth>\
                                    </ode>\
                                </contact>\
                               </collision>\
                              </link>\
                              <joint name=\"world_to_base\" type=\"fixed\"> \
                                <origin xyz=\"0 0 0.5\" rpy=\"0 0 0\"/>\
                                <parent link=\"world\"/>\
                                <child link=\"cylinder0\"/>\
                              </joint>\
                              <gazebo reference=\"cylinder0\">\
                                <material>Gazebo/Blue</material>\
                              </gazebo>\
                            </robot>"

                    #BOX
                # model_xml = "<?xml version=\"1.0\"?> \
                #              <robot name=\"myfirst\"> \
                #                   <link name=\"world\"> \
                #                   </link>\
                #                   <link name=\"box0\">\
                #                     <visual>\
                #                       <geometry>\
                #                         <box>\
                #                             <size>0.2 0.2 1.0</size>\
                #                         </box>\
                #                       </geometry>\
                #                       <origin xyz=\"0 0 0\"/>\
                #                       <material name=\"blue\">\
                #                           <ambient>0.5 0.5 1.0 0.1</ambient>\
                #                           <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                #                       </material>\
                #                     </visual>\
                #                     <inertial>\
                #                       <mass value=\"5.0\"/>\
                #                       <inertia>\
                #                         <ixx>1.0</ixx>\
                #                         <ixy>0</ixy>\
                #                         <ixz>0</ixz>\
                #                         <iyy>1.0</iyy>\
                #                         <iyz>0</iyz>\
                #                         <izz>1.0</izz>\
                #                       </inertia>\
                #                     </inertial>\
                #                     <collision>\
                #                       <geometry>\
                #                         <box>\
                #                             <size>0.2 0.2 1.0</size>\
                #                         </box>\
                #                       </geometry>\
                #                     </collision>\
                #                   </link>\
                #                   <joint name=\"world_to_base\" type=\"fixed\"> \
                #                     <origin xyz=\"0 0 0.7\" rpy=\"0 0 0\"/>\
                #                     <parent link=\"world\"/>\
                #                     <child link=\"box0\"/>\
                #                   </joint>\
                #                   <gazebo reference=\"box0\">\
                #                     <material>Gazebo/Blue</material>\
                #                   </gazebo>\
                #                 </robot>"

                # model_xml = "<?xml version=\"1.0\"?> \
                #             <robot name=\"myfirst\"> \
                #               <link name=\"world\"> \
                #               </link>\
                #               <link name=\"cylinder0\">\
                #                 <visual>\
                #                   <geometry>\
                #                     <sphere radius=\"0.01\"/>\
                #                   </geometry>\
                #                   <origin xyz=\"0 0 0\"/>\
                #                   <material name=\"rojotransparente\">\
                #                       <ambient>0.5 0.5 1.0 0.1</ambient>\
                #                       <diffuse>0.5 0.5 1.0 0.1</diffuse>\
                #                   </material>\
                #                 </visual>\
                #                 <inertial>\
                #                   <mass value=\"5.0\"/>\
                #                   <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\
                #                 </inertial>\
                #               </link>\
                #               <joint name=\"world_to_base\" type=\"fixed\"> \
                #                 <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\
                #                 <parent link=\"world\"/>\
                #                 <child link=\"cylinder0\"/>\
                #               </joint>\
                #               <gazebo reference=\"cylinder0\">\
                #                 <material>Gazebo/GreenTransparent</material>\
                #               </gazebo>\
                #             </robot>"

                robot_namespace = ""
                pose = Pose()
                number = random.randint(0, 4)
                # if number == 1:
                #     pose.position.x = 0.25;
                #     pose.position.y = 0.07;
                # if number == 2:
                #     pose.position.x = 0.25;
                #     pose.position.y = -0.07;
                # if number == 3:
                #     pose.position.x = 0.23;
                #     pose.position.y = -0.087;
                # if number == 4:
                #     pose.position.x = 0.23;
                #     pose.position.y = 0.087;

                pose.position.x = 0.25;#
                pose.position.y = 0.07;#

                pose.position.z = 0.0;
                pose.orientation.x = 0;
                pose.orientation.y= 0;
                pose.orientation.z = 0;
                pose.orientation.w = 0;
                reference_frame = ""
                if number > -1:
                  self.add_model(model_name="obstacle",
                                model_xml=model_xml,
                                robot_namespace=robot_namespace,
                                initial_pose=pose,
                                reference_frame="")
            except (rospy.ServiceException) as e:
                print ("/gazebo/spawn_urdf_model service call failed")

    def removeObstacle(self):
            rospy.wait_for_service('/gazebo/delete_model')
            try:
                self.remove_model(model_name="obstacle")
            except (rospy.ServiceException) as e:
                print ("/gazebo/spawn_urdf_model service call failed")

    def _reset(self):
        # """
        # Reset the agent for a particular experiment condition.
        # """
        # # Resets the state of the environment and returns an initial observation.
        # rospy.wait_for_service('/gazebo/reset_simulation')
        # try:
        #     #reset_proxy.call()
        #     self.reset_proxy()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/reset_simulation service call failed")
        #.,
        # # Unpause simulation
        # rospy.wait_for_service('/gazebo/unpause_physics')
        # try:
        #     self.unpause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/unpause_physics service call failed")

        # Go to initial position and wait until it arrives there
        # self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']))
        # time.sleep(int(self.environment['slowness']))
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds

        # # Pause the simulation
        # rospy.wait_for_service('/gazebo/pause_physics')
        # try:
        #     self.pause()
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/pause_physics service call failed")

        # try:
        #     self.remove_model(model_name="obstacle")
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/spawn_urdf_model service call failed")




        #remove previous spawned model in order to avoid respawning model with same name
        # rospy.wait_for_service('/gazebo/delete_model')
        # try:
        #     self.remove_model(model_name="obstacle")
        # except (rospy.ServiceException) as e:
        #     print ("/gazebo/spawn_urdf_model service call failed")
        #remove previous spawned model in order to avoid respawning model with same name



        self.iterator = 0
        msg = self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])
        msg.points[0].time_from_start.secs = 0
        msg.points[0].time_from_start.nsecs = 10000
        self._pub.publish(msg)

        # time.sleep(int(self.environment['slowness'])) # using seconds
        # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            time.sleep(int(self.slowness))
        elif(self.slowness_unit == 'nsec'):
            time.sleep(int(self.slowness/1000000000)) # using nanoseconds
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while(self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
Esempio n. 14
0
class RealModularMara3DOFv0Env(real_env.RealEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        real_env.RealEnv.__init__(self)

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000 # now used in all algorithms
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'
        self.reset_jnts = True
        self.detect_target_once = 1

        self._time_lock = threading.RLock()

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach
        EE_POS_TGT = np.asmatrix([-0.48731417, -0.00258965,  0.82467501])

        EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238,  0.31536287], [0.51531424,  0.85207952,  0.09171542], [-0.31601302,  0.08944959,  0.94452874]])
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        # INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.])
        INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        # STEP_COUNT = 2  # Typically 100.
        # slowness = 10000000 # 10 ms, where 1 second is real life simulation
        # slowness = 1000000 # 1 ms, where 1 second is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation
        # slowness = 10 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/mara_controller/command'
        JOINT_SUBSCRIBER = '/mara_controller/state'

        # joint names:
        MOTOR1_JOINT = 'hros_actuation_servomotor_000A3500014E2'
        MOTOR2_JOINT = 'hros_actuation_servomotor_000A3500014E'
        MOTOR3_JOINT = 'hros_actuation_servomotor_000A3500018E'
        MOTOR4_JOINT = 'hros_actuation_servomotor_000A3500018E2'
        MOTOR5_JOINT = 'hros_actuation_servomotor_000A3500016E'
        MOTOR6_JOINT = 'hros_actuation_servomotor_000A3500016E2'

        # Set constants for links
        TABLE = 'table'

        BASE = 'base_link'

        MARA_MOTOR1_LINK = 'motor1_link'
        MARA_MOTOR2_LINK = 'motor2_link'
        MARA_MOTOR3_LINK = 'motor3_link'
        MARA_MOTOR4_LINK = 'motor4_link'
        MARA_MOTOR5_LINK = 'motor5_link'
        MARA_MOTOR6_LINK = 'motor6_link'
        EE_LINK = 'ee_link'


        # EE_LINK = 'ee_link'
        JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT,
                       MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT]
        LINK_NAMES = [TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK,
                            MARA_MOTOR3_LINK, MARA_MOTOR4_LINK,
                            MARA_MOTOR5_LINK, MARA_MOTOR6_LINK,
                      EE_LINK]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
             'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = '/home/erle/ros_catkin_ws/src/mara/mara_description/urdf/mara_demo_camera_top.urdf'

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)
        self.realgoal = ee_tgt
        self.target_orientation = ee_rot_tgt

        self.environment = {
            # rk changed this to for the mlsh
            # 'ee_points_tgt': ee_tgt,
            'ee_points_tgt': self.realgoal,
            'ee_point_tgt_orient': self.target_orientation,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory)
        self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback)

        TARGET_SUBSCRIBER = '/mara/target'
        self._sub_tgt = rospy.Subscriber(TARGET_SUBSCRIBER, Pose, self.tgt_callback)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling step()
        # observation = self.take_observation()
        # assert not done
        # self.obs_dim = observation.size
        """
        obs_dim is defined as:
        num_dof + end_effector_points=3 + end_effector_velocities=3
        end_effector_points and end_effector_velocities is constant and equals 3
        recently also added quaternion to the obs, which has dimension=4
        """

        self.obs_dim = self.scara_chain.getNrOfJoints() + 10 #6 hardcode it for now

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints())

        self.action_space = spaces.Box(low, high)
        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel)
        self.add_model_sdf = rospy.ServiceProxy('/gazebo/spawn_sdf_model', SpawnModel)
        self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel)
        self.pub_set_model = rospy.Publisher('/gazebo/set_model_state', ModelState, queue_size=1)

        # Seed the environment
        # Seed the environment
        self._seed()
        self.kf = []
        for i in range(6):
            # self.kf.append(KalmanFilter(initial_state_mean=0,
            #                             transition_matrices=[1],
            #                             observation_matrices=[1],
            #                             initial_state_covariance=1,
            #                             observation_covariance=5,
            #                             transition_covariance=0.001,
            #                             n_dim_obs=1, n_dim_state=1,
            #                             em_vars=['transition_covariance', 'observation_covariance', 'initial_state_covariance']))
            # self.kf.append(KalmanFilter(initial_state_mean=0,
            #                             transition_matrices=[1],
            #                             observation_matrices=[1],
            #                             initial_state_covariance=1,
            #                             observation_covariance=1,
            #                             transition_covariance=0.1,
            #                             n_dim_obs=1, n_dim_state=1))

            self.kf.append(RingBuffer(500))
    def tgt_callback(self,msg):
        # print("Whats the target?: ", msg)
        if self.detect_target_once is 1:
            print("Get the target from vision, for now just use position.")
            EE_POS_TGT = np.asmatrix([msg.position.x, msg.position.y, msg.position.z])
            EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
            EE_POINTS = np.asmatrix([[0, 0, 0]])
            ee_pos_tgt = EE_POS_TGT

            # leave rotation target same since in scara we do not have rotation of the end-effector
            ee_rot_tgt = EE_ROT_TGT
            target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)

            # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T)

            self.realgoal = target1
            print("Predicted target is: ", self.realgoal)
            self.detect_target_once = 0

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        # index = 0
        # for value in message.actual.positions:
        #      message.actual.positions[index] = value * 3.14/180
        #      index = index + 1
        self._observation_msg =  message

    def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=True):
            self.slowness = slowness
            self.slowness_unit = slowness_unit
            self.reset_jnts = reset_jnts
            print("slowness: ", self.slowness)
            print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit))
            print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts))

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]

        index = 0
        # print(self.kf)
        print(action_float)
        for value in action_float:
            # # self.kf[index] = self.kf[index].em(value, n_iter=5)
            # (filtered_state_means, filtered_state_covariances) = self.kf[index].filter(value)
            # (smoothed_state_means, smoothed_state_covariances) = self.kf[index].smooth(value)
            # action_float[index] = smoothed_state_means
            self.kf[index].append(value)
            action_float[index] = self.kf[index].mean()
            index = index + 1
        print(action_float)
        print('---------------------')

        target.positions = action_float
        target.velocities = [0]*6
        target.effort = [float('nan')]*6
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty");
            # return None
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(map(lambda x,y: x in y, message.joint_names,
                    [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(message.actual.positions) # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3),
                                                       ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        # Take an observation
        # done = False

        obs_message = self._observation_msg
        if obs_message is None:
            # print("last_observations is empty")
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message, self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            # print(self.environment['link_names'][-1])
            trans, rot = forward_kinematics(self.scara_chain,
                                        self.environment['link_names'],
                                        last_observations[:self.scara_chain.getNrOfJoints()],
                                        base_link=self.environment['link_names'][0],
                                        end_link=self.environment['link_names'][-1])

            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans

            # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here
            current_quaternion = quaternion_from_matrix(rotation_matrix)
            tgt_quartenion = quaternion_from_matrix(self.target_orientation)

            A  = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T

            quat_error = current_quaternion - tgt_quartenion

            current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'],
                                                              trans,
                                                              rot).T)
            ee_points = current_ee_tgt - self.realgoal#self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians,
                                                                   self.environment['end_effector_points'],
                                                                   rot)
            ee_velocities = self.get_ee_points_velocities(ee_link_jacobians,
                                                           self.environment['end_effector_points'],
                                                           rot,
                                                           last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(quat_error, -1),
                          np.reshape(ee_velocities, -1),]

            return state

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - reward
            - done (status)
            - action
            - observation
            - dictionary (#TODO clarify)
        """
        self.iterator+=1

        self.reward_dist = -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])
        self.reward_orient = - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])

        #scale here the orientation because it should not be the main bias of the reward, position should be
        orientation_scale = 0.01

        # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
        if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005):
            self.reward = 1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases
            print("Reward is: ", self.reward)
        else:
            self.reward = self.reward_dist

        if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])<0.05):
            self.reward = self.reward +  orientation_scale * (1 -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)]))
            print("Reward orientation is: ", self.reward)
        else:
            self.reward = self.reward + orientation_scale * self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])

        # Calculate if the env has been solved
        done = bool(((abs(self.reward_dist) < 0.005) and (abs(self.reward_orient)) < 0.05) or (self.iterator>self.max_episode_steps))

        # Execute "action"
        self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while(self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return

        return self.ob, self.reward, done, {}

    def publish_last_position(self, values):
        self._pub.publish(self.get_trajectory_message(values[:self.scara_chain.getNrOfJoints()]))

    def goToInit(self):
        self.ob = self.take_observation()
        while(self.ob is None):
            self.ob = self.take_observation()
        # # Go to initial position and wait until it arrives there
        # Wait until the arm is within epsilon of reset configuration.
        self._time_lock.acquire(True, -1)
        with self._time_lock:
            self._currently_resetting = True
        self._time_lock.release()

        if self._currently_resetting:
            epsilon = 1e-3
            reset_action = self.environment['reset_conditions']['initial_positions']
            now_action = self._observation_msg.actual.positions
            du = np.linalg.norm(reset_action-now_action, float(np.inf))
            self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']))
            if du > epsilon:
                self._currently_resetting = True
                self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']))
                time.sleep(3)

    def reset(self):
        """
        Reset the agent for a particular experiment condition.
        """

        self.iterator = 0

        if self.reset_jnts is True:
            self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']))
            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                time.sleep(int(self.slowness))
            elif(self.slowness_unit == 'nsec'):
                time.sleep(int(self.slowness/1000000000)) # using nanoseconds
            else:
                print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while(self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob
class GazeboMARATopOrientCollisionv0Env(gazebo_env.GazeboEnv):
    """
    This environment present a modular SCARA robot with a range finder at its
    end pointing towards the workspace of the robot. The goal of this environment is
    defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace.
    This environment uses `slowness=1` and matches the delay between actions/observations
    to this value (slowness). In other words, actions are taken at "1/slowness" rate.

    Reward is determined ... (TODO: describe the heuristic or reward calculation method)
    """
    def __init__(self):
        """
        Initialize the SCARA environemnt
            NOTE: This environment uses ROS and interfaces.

            TODO: port everything to ROS 2 natively
        """
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "MARATop6DOF_Collision_v0.launch")

        # TODO: cleanup this variables, remove the ones that aren't used
        # class variables
        self._observation_msg = None
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None
        self.obs = None
        self.reward = None
        self.done = None
        self.reward_dist = None
        self.reward_ctrl = None
        self.action_space = None
        self.max_episode_steps = 1000  # now used in all algorithms
        self.iterator = 0
        # default to seconds
        self.slowness = 1
        self.slowness_unit = 'sec'
        self.reset_jnts = True
        self._collision_msg = None

        self._time_lock = threading.RLock()

        #############################
        #   Environment hyperparams
        #############################
        # target, where should the agent reach

        EE_POS_TGT = np.asmatrix([-0.40028, 0.095615, 0.72466])  # alex2
        # EE_POS_TGT = np.asmatrix([-0.580238, -0.179591, 0.72466]) # rubik touching the bar
        # EE_ROT_TGT = np.asmatrix([[-0.00128296,  0.9999805 ,  0.00611158],
        #                            [ 0.00231397, -0.0061086 ,  0.99997867],
        #                            [ 0.9999965 ,  0.00129708, -0.00230609]])
        # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis
        # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981])
        # EE_POS_TGT = np.asmatrix([-0.4023037912211465, 0.15501116706606247, 0.7238499613771884]) # 200 cm from the z axis

        # # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H
        # EE_ROT_TGT = np.asmatrix([[-0.99521107,  0.09689605, -0.01288708],
        #                           [-0.09768035, -0.99077857,  0.09389558],
        #                           [-0.00367013,  0.09470474,  0.99549864]])

        # EE_ROT_TGT = np.asmatrix([[-0.99521107,  0.09689605, -0.01288708],
        #                           [-0.09768035, -0.99077857,  0.09389558],
        #                           [-0.00367013,  0.09470474,  0.99549864]])
        # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287],
                                  [0.51531424, 0.85207952, 0.09171542],
                                  [-0.31601302, 0.08944959,
                                   0.94452874]])  # original orientation
        EE_POINTS = np.asmatrix([[0, 0, 0]])
        EE_VELOCITIES = np.asmatrix([[0, 0, 0]])
        # Initial joint position
        # INITIAL_JOINTS = np.array([0., 0., 1., 0., 1.57, 0.])
        INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.])
        # Used to initialize the robot, #TODO, clarify this more
        # STEP_COUNT = 2  # Typically 100.
        # slowness = 10000000 # 10 ms, where 1 second is real life simulation
        # slowness = 1000000 # 1 ms, where 1 second is real life simulation
        # slowness = 1 # use >10 for running trained network in the simulation
        # slowness = 10 # use >10 for running trained network in the simulation

        # Topics for the robot publisher and subscriber.
        JOINT_PUBLISHER = '/mara_controller/command'
        JOINT_SUBSCRIBER = '/mara_controller/state'
        RAND_LIGHT_PUBLISHER = '/randomizers/randomizer/light'
        RAND_SKY_PUBLISHER = '/randomizers/randomizer/sky'
        RAND_PHYSICS_PUBLISHER = '/randomizers/randomizer/physics'
        LINK_STATE_PUBLISHER = '/gazebo/set_link_state'
        RAND_OBSTACLES_PUBLISHER = '/randomizers/randomizer/obstacles'

        # joint names:
        MOTOR1_JOINT = 'motor1'
        MOTOR2_JOINT = 'motor2'
        MOTOR3_JOINT = 'motor3'
        MOTOR4_JOINT = 'motor4'
        MOTOR5_JOINT = 'motor5'
        MOTOR6_JOINT = 'motor6'

        # Set constants for links
        TABLE = 'table'

        BASE = 'base_link'

        MARA_MOTOR1_LINK = 'motor1_link'
        MARA_MOTOR2_LINK = 'motor2_link'
        MARA_MOTOR3_LINK = 'motor3_link'
        MARA_MOTOR4_LINK = 'motor4_link'
        MARA_MOTOR5_LINK = 'motor5_link'
        MARA_MOTOR6_LINK = 'motor6_link'
        EE_LINK = 'ee_link'

        # EE_LINK = 'ee_link'
        JOINT_ORDER = [
            MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT,
            MOTOR5_JOINT, MOTOR6_JOINT
        ]
        LINK_NAMES = [
            TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK,
            MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK
        ]

        reset_condition = {
            'initial_positions': INITIAL_JOINTS,
            'initial_velocities': []
        }
        #############################

        # TODO: fix this and make it relative
        # Set the path of the corresponding URDF file from "assets"
        URDF_PATH = rospkg.RosPack().get_path(
            "mara_description") + "/urdf/mara_demo_camera_top.urdf"

        m_joint_order = copy.deepcopy(JOINT_ORDER)
        m_link_names = copy.deepcopy(LINK_NAMES)
        m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER)
        m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER)
        ee_pos_tgt = EE_POS_TGT
        ee_rot_tgt = EE_ROT_TGT

        # Initialize target end effector position
        ee_tgt = np.ndarray.flatten(
            get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)
        self.realgoal = ee_tgt
        self.target_orientation = ee_rot_tgt

        self.environment = {
            # rk changed this to for the mlsh
            # 'ee_points_tgt': ee_tgt,
            'ee_points_tgt': self.realgoal,
            'ee_point_tgt_orient': self.target_orientation,
            'joint_order': m_joint_order,
            'link_names': m_link_names,
            # 'slowness': slowness,
            'reset_conditions': reset_condition,
            'tree_path': URDF_PATH,
            'joint_publisher': m_joint_publishers,
            'joint_subscriber': m_joint_subscribers,
            'end_effector_points': EE_POINTS,
            'end_effector_velocities': EE_VELOCITIES,
        }

        # self.spec = {'timestep_limit': 5, 'reward_threshold':  950.0,}

        # Subscribe to the appropriate topics, taking into account the particular robot
        # ROS 1 implementation
        self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory)
        self._sub = rospy.Subscriber(JOINT_SUBSCRIBER,
                                     JointTrajectoryControllerState,
                                     self.observation_callback)
        self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState,
                                          self.collision_callback)

        self._pub_rand_light = rospy.Publisher(RAND_LIGHT_PUBLISHER, stdEmpty)
        self._pub_rand_sky = rospy.Publisher(RAND_SKY_PUBLISHER, stdEmpty)
        self._pub_rand_physics = rospy.Publisher(RAND_PHYSICS_PUBLISHER,
                                                 stdEmpty)
        self._pub_rand_obstacles = rospy.Publisher(RAND_OBSTACLES_PUBLISHER,
                                                   stdEmpty)
        self._pub_link_state = rospy.Publisher(LINK_STATE_PUBLISHER, LinkState)

        # Initialize a tree structure from the robot urdf.
        #   note that the xacro of the urdf is updated by hand.
        # The urdf must be compiled.
        _, self.ur_tree = treeFromFile(self.environment['tree_path'])
        # Retrieve a chain structure between the base and the start of the end effector.
        self.scara_chain = self.ur_tree.getChain(
            self.environment['link_names'][0],
            self.environment['link_names'][-1])
        # print("nr of jnts: ", self.scara_chain.getNrOfJoints())
        # Initialize a KDL Jacobian solver from the chain.
        self.jac_solver = ChainJntToJacSolver(self.scara_chain)
        #print(self.jac_solver)
        self._observations_stale = [False for _ in range(1)]
        #print("after observations stale")
        self._currently_resetting = [False for _ in range(1)]
        self.reset_joint_angles = [None for _ in range(1)]

        # TODO review with Risto, we might need the first observation for calling step()
        # observation = self.take_observation()
        # assert not done
        # self.obs_dim = observation.size
        """
        obs_dim is defined as:
        num_dof + end_effector_points=3 + end_effector_velocities=3
        end_effector_points and end_effector_velocities is constant and equals 3
        recently also added quaternion to the obs, which has dimension=4
        """
        #
        self.obs_dim = self.scara_chain.getNrOfJoints(
        ) + 9  #7 #6 hardcode it for now
        # # print(observation, _reward)

        # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this.
        # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.]
        # #bounds = self.model.actuator_ctrlrange.copy()
        low = -np.pi * np.ones(self.scara_chain.getNrOfJoints())
        high = np.pi * np.ones(self.scara_chain.getNrOfJoints())
        # low = -np.inf * np.ones(self.scara_chain.getNrOfJoints())
        # high = np.inf * np.ones(self.scara_chain.getNrOfJoints())
        # print("Action spaces: ", low, high)
        self.action_space = spaces.Box(low, high, dtype=np.float32)
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        self.add_model_urdf = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                                 SpawnModel)
        self.add_model_sdf = rospy.ServiceProxy('/gazebo/spawn_sdf_model',
                                                SpawnModel)
        self.remove_model = rospy.ServiceProxy('/gazebo/delete_model',
                                               DeleteModel)
        self.set_model_pose = rospy.ServiceProxy('/gazebo/set_model_state',
                                                 SetModelState)
        self.get_model_state = rospy.ServiceProxy('/gazebo/get_model_state',
                                                  GetModelState)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)

        robot_namespace = ""
        pose = Pose()
        pose.position.x = EE_POS_TGT[0, 0]
        pose.position.y = EE_POS_TGT[0, 1]
        pose.position.z = EE_POS_TGT[0, 2]
        pose.orientation.x = 0
        pose.orientation.y = 0
        pose.orientation.z = 0
        pose.orientation.w = 0
        reference_frame = ""

        self.envs_path = os.path.dirname(
            os.path.dirname(os.path.abspath(__file__)))

        file_xml = open(self.envs_path + '/assets/urdf/target/point.urdf',
                        mode='r')
        model_xml = file_xml.read()
        file_xml.close()

        rospy.wait_for_service('/gazebo/spawn_urdf_model')
        try:
            self.add_model_urdf(model_name="target",
                                model_xml=model_xml,
                                robot_namespace="",
                                initial_pose=pose,
                                reference_frame="world")
        except rospy.ServiceException as e:
            print('Error adding urdf model')

        # # self.obj_path = self.envs_path + '/assets/urdf/objs/rubik_cube/rubik_cube_random.sdf'
        # # self.obj_path = self.envs_path + '/assets/urdf/objs/rubik_cube/rubik_cube.sdf'
        # self.obj_path = self.envs_path + '/assets/urdf/objs/box.sdf' #0.067 0.067 0.067
        # # self.obj_path = self.envs_path + '/assets/urdf/objs/cylinder.sdf' # radius = 0.03 length = 0.08
        # # self.obj_path = self.envs_path + '/assets/urdf/objs/sphere.urdf' #radius = 0.033
        # file_sdf = open(self.obj_path ,mode='r')
        # model_sdf = file_sdf.read()
        # file_sdf.close()
        #
        # rospy.wait_for_service('/gazebo/spawn_sdf_model')
        # # rospy.wait_for_service('/gazebo/spawn_urdf_model')
        # try:
        #     # self.add_model_urdf(model_name="obj",
        #     self.add_model_sdf(model_name="obj",
        #                         model_xml=model_sdf,
        #                         robot_namespace="",
        #                         initial_pose=pose,
        #                         reference_frame="world")
        # except rospy.ServiceException as e:
        #     print('Error adding sdf model')

        self._seed()

    def collision_callback(self, message):
        """
        Callback method for the subscriber of Collision data
        """
        self._collision_msg = None
        # if message.collision1_name is not message.collision2_name:
        #     if "objs" not in message.collision1_name and "obj" not in message.collision2_name:
        #         if "obj" not in message.collision1_name or "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name:
        #             if "robot::motor6_link::motor6_link_fixed_joint_lump__robotiq_arg2f_base_link_collision_1" not in message.collision1_name and  "robot::left_outer_finger::left_outer_finger_collision" not in message.collision2_name:
        #                 self._collision_msg =  message

        if message.collision1_name is not message.collision2_name:
            if "obj" not in message.collision1_name and "obj" not in message.collision2_name:
                if "obj" not in message.collision1_name and "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name:
                    if "robot::motor6_link::motor6_link_fixed_joint_lump__robotiq_arg2f_base_link_collision_1" not in message.collision1_name and "robot::left_outer_finger::left_outer_finger_collision" not in message.collision2_name:
                        if "obstacle" not in message.collision1_name and "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name:
                            if "obstacle" not in message.collision1_name and "obj" not in "message.collision2_name":
                                self._collision_msg = message

    def observation_callback(self, message):
        """
        Callback method for the subscriber of JointTrajectoryControllerState
        """
        self._observation_msg = message

    def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True):
        self.slowness = slowness
        self.slowness_unit = slowness_unit
        self.reset_jnts = reset_jnts
        print("slowness: ", self.slowness)
        print("slowness_unit: ", self.slowness_unit, "type of variable: ",
              type(slowness_unit))
        print("reset joints: ", self.reset_jnts, "type of variable: ",
              type(self.reset_jnts))

    def getModelFile(self, path):
        if path.endswith('.sdf'):
            return "sdf"
        elif path.endswith('.urdf'):
            return "urdf"
        else:
            raise TypeError('the file must be .sdf or .urdf')

    def randomizeSize(self, current_obj_name, shape):
        f = open(self.obj_path, 'r+')
        model_xml = f.read()
        f.seek(0)

        min_size = 0.03
        max_size = 0.1

        if shape != 'box' and shape != 'cylinder' and shape != 'sphere':
            raise TypeError("Shape must be 'box', 'cylinder' or 'sphere'")

        if shape == "box":
            size_x = str(round(np.random.uniform(min_size, max_size), 3))
            size_y = str(round(np.random.uniform(min_size, max_size), 3))
            size_z = str(round(np.random.uniform(min_size, max_size), 3))
            # height position of the wooden part of the table + height of the obj
            z = 0.69525 + float(size_z) / 2
        else:
            radius = str(round(np.random.uniform(min_size, max_size / 2), 3))
            z = 0.69525 + float(radius)
            if shape == "cylinder":
                length = str(round(np.random.uniform(min_size, max_size), 3))
                z = 0.69525 + float(length) / 2

        model_file = self.getModelFile(self.obj_path)
        if model_file == "sdf":
            if shape == "box":
                to_replace = model_xml[model_xml.find('<size>') +
                                       len('<size>'):model_xml.find('</size>')]
                model_xml = model_xml.replace(
                    to_replace, size_x + ' ' + size_y + ' ' + size_z)
            else:
                to_replace = model_xml[model_xml.find('<radius>') +
                                       len('<radius>'):model_xml.
                                       find('</radius>')]
                model_xml = model_xml.replace(to_replace, radius)
                if shape == "cylinder":
                    to_replace = model_xml[model_xml.find('<length>') +
                                           len('<length>'):model_xml.
                                           find('</length>')]
                    model_xml = model_xml.replace(to_replace, length)

        else:
            if shape == "box":
                to_replace = model_xml[model_xml.find('<box ') +
                                       len('<box '):model_xml.find('/>')]
                model_xml = model_xml.replace(
                    to_replace,
                    'size=' + '"' + size_x + ' ' + size_y + ' ' + size_z + '"')
            else:
                to_replace = model_xml[model_xml.find('<sphere ') +
                                       len('<sphere '):model_xml.find('/>')]
                model_xml = model_xml.replace(to_replace,
                                              'radius=' + '"' + radius + '"')
                if shape == "cylinder":
                    to_replace = model_xml[model_xml.find('<cylinder ') +
                                           len('<cylinder '):model_xml.
                                           find('/>')]
                    model_xml = model_xml.replace(
                        to_replace, 'length=' + '"' + length + '" ' +
                        'radius=' + '"' + radius + '"')

        f.truncate()
        f.write(model_xml)
        f.close()
        self.randomizeObjectType(current_obj_name=current_obj_name,
                                 replace=self.obj_path)
        self.randomizeTargetPose(obj_name=current_obj_name, centerPoint=z)

    def random_texture(self):
        material_path = self.envs_path + "/assets/urdf/Media/materials/scripts/textures.material"
        m = open(material_path, 'r')
        textures = []

        for t in m:
            if t.startswith("material "):
                textures.append(t[9:-1])

        rand_texture = np.random.choice(textures)
        return rand_texture, textures

    def randomizeTexture(self, current_obj_name):
        f = open(self.obj_path, 'r+')
        model_xml = f.read()
        f.seek(0)

        new_texture, list_textures = self.random_texture()
        for lt in list_textures:
            if lt != str(new_texture):
                model_xml = model_xml.replace(lt, new_texture)

        f.truncate()
        f.write(model_xml)
        f.close()
        self.randomizeObjectType(current_obj_name=current_obj_name,
                                 replace=self.obj_path)

    def randomizeObjectType(self,
                            current_obj_name=None,
                            list_obj=None,
                            replace=None):
        obj = ModelState()

        rospy.wait_for_service('gazebo/get_model_state')
        try:
            obj = self.get_model_state("target", '')
        except rospy.ServiceException as e:
            print("Error getting the model state")

        rospy.wait_for_service('/gazebo/delete_model')
        try:
            self.remove_model(current_obj_name)
        except rospy.ServiceException as e:
            print("Error removing model")

        if replace is None:
            random_obj = np.random.choice(list_obj)
            self.obj_path = random_obj
        else:
            random_obj = replace

        obj_file = open(random_obj, mode='r')
        model_xml = obj_file.read()
        obj_file.close()

        model_file = self.getModelFile(random_obj)
        if model_file == "sdf":
            rospy.wait_for_service('/gazebo/spawn_sdf_model')
            try:
                self.add_model_sdf(model_name=current_obj_name,
                                   model_xml=model_xml,
                                   robot_namespace="",
                                   initial_pose=obj.pose,
                                   reference_frame="world")
            except rospy.ServiceException as e:
                print("Error adding sdf")
        else:
            rospy.wait_for_service('/gazebo/spawn_urdf_model')
            try:
                self.add_model_urdf(model_name=current_obj_name,
                                    model_xml=model_xml,
                                    robot_namespace="",
                                    initial_pose=obj.pose,
                                    reference_frame="world")
            except rospy.ServiceException as e:
                print("Error adding urdf")

    def randomizeStartPose(self, lower, upper):
        self.environment['reset_conditions']['initial_positions'] = [
            np.random.uniform(lower, upper),
            np.random.uniform(lower, upper),
            np.random.uniform(lower, upper),
            np.random.uniform(lower, upper),
            np.random.uniform(lower, upper),
            np.random.uniform(lower, upper)
        ]
        self._pub.publish(
            self.get_trajectory_message(
                self.environment['reset_conditions']['initial_positions']))

    def randomizeTargetPose(self, obj_name, centerPoint=None):
        ms = ModelState()
        if centerPoint is None:
            EE_POS_TGT = np.asmatrix([
                round(np.random.uniform(-0.62713, -0.29082), 5),
                round(np.random.uniform(-0.15654, 0.15925), 5),
                self.realgoal[2]
            ])

            roll = 0.0
            pitch = 0.0
            yaw = np.random.uniform(-1.57, 1.57)
            q = quat.from_euler_angles(roll, pitch, yaw)
            EE_ROT_TGT = rot_matrix = quat.as_rotation_matrix(q)
            self.target_orientation = EE_ROT_TGT
            ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'],
                              EE_POS_TGT, EE_ROT_TGT).T)

            ms.pose.position.x = EE_POS_TGT[0, 0]
            ms.pose.position.y = EE_POS_TGT[0, 1]
            ms.pose.position.z = EE_POS_TGT[0, 2]
            ms.pose.orientation.x = q.x
            ms.pose.orientation.y = q.y
            ms.pose.orientation.z = q.z
            ms.pose.orientation.w = q.w

            ms.model_name = obj_name
            rospy.wait_for_service('gazebo/set_model_state')
            try:
                self.set_model_pose(ms)
            except (rospy.ServiceException) as e:
                print("Error setting the pose of " + obj_name)

        else:
            EE_POS_TGT = np.asmatrix(
                [self.realgoal[0], self.realgoal[1], centerPoint])
            ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'],
                              EE_POS_TGT, self.target_orientation).T)

            ms.pose.position.x = EE_POS_TGT[0, 0]
            ms.pose.position.y = EE_POS_TGT[0, 1]
            ms.pose.position.z = EE_POS_TGT[0, 2]
            ms.pose.orientation.x = 0
            ms.pose.orientation.y = 0
            ms.pose.orientation.z = 0
            ms.pose.orientation.w = 0

        self._pub_link_state.publish(
            LinkState(link_name="target_link",
                      pose=ms.pose,
                      reference_frame="world"))
        self.realgoal = ee_tgt

    def get_trajectory_message(self, action, robot_id=0):
        """
        Helper function.
        Wraps an action vector of joint angles into a JointTrajectory message.
        The velocities, accelerations, and effort do not control the arm motion
        """
        # Set up a trajectory message to publish.
        action_msg = JointTrajectory()
        action_msg.joint_names = self.environment['joint_order']
        # Create a point to tell the robot to move to.
        target = JointTrajectoryPoint()
        action_float = [float(i) for i in action]
        target.positions = action_float
        # These times determine the speed at which the robot moves:
        # it tries to reach the specified target position in 'slowness' time.
        if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
            target.time_from_start.secs = self.slowness
        elif (self.slowness_unit == 'nsec'):
            target.time_from_start.nsecs = self.slowness
        else:
            print("Unrecognized unit. Please use sec or nsec.")

        # Package the single point into a trajectory of points with length 1.
        action_msg.points = [target]
        return action_msg

    def process_observations(self, message, agent, robot_id=0):
        """
        Helper fuinction to convert a ROS message to joint angles and velocities.
        Check for and handle the case where a message is either malformed
        or contains joint values in an order different from that expected observation_callback
        in hyperparams['joint_order']
        """
        if not message:
            print("Message is empty")
        else:
            # # Check if joint values are in the expected order and size.
            if message.joint_names != agent['joint_order']:
                # Check that the message is of same size as the expected message.
                if len(message.joint_names) != len(agent['joint_order']):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER

                # Check that all the expected joint values are present in a message.
                if not all(
                        map(lambda x, y: x in y, message.joint_names, [
                            self._valid_joint_set[robot_id]
                            for _ in range(len(message.joint_names))
                        ])):
                    raise MSG_INVALID_JOINT_NAMES_DIFFER
                    print("Joints differ")
            return np.array(
                message.actual.positions)  # + message.actual.velocities

    def get_jacobians(self, state, robot_id=0):
        """
        Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
        Returns a repackaged Jacobian that is 3x6.
        """
        # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles
        jacobian = Jacobian(self.scara_chain.getNrOfJoints())
        # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles.
        angles = JntArray(self.scara_chain.getNrOfJoints())
        # Construct the joint array from the most recent joint angles.
        for i in range(self.scara_chain.getNrOfJoints()):
            angles[i] = state[i]
        # Update the jacobian by solving for the given angles.observation_callback
        self.jac_solver.JntToJac(angles, jacobian)
        # Initialize a numpy array to store the Jacobian.
        J = np.array([[jacobian[i, j] for j in range(jacobian.columns())]
                      for i in range(jacobian.rows())])
        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
        ee_jacobians = J
        return ee_jacobians

    def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot):
        """
        Get the jacobians of the points on a link given the jacobian for that link's origin
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point
                 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point
        """
        ee_points = np.asarray(ee_points)
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T,
                                                 axis=1)
        ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \
                                        np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose(
                                            (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints())
        ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1))
        return ee_points_jac_trans, ee_points_jac_rot

    def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot,
                                 joint_velocities):
        """
        Get the velocities of the points on a link
        :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin
        :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system
        :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system
        :param joint_velocities: 1 x 6 numpy array, joint velocities
        :return: 3N numpy array, velocities of each point
        """
        ref_jacobians_trans = ref_jacobian[:3, :]
        ref_jacobians_rot = ref_jacobian[3:, :]
        ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities)
        ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities)
        ee_velocities = ee_velocities_trans + np.cross(
            ee_velocities_rot.reshape(1, 3),
            ref_rot.dot(ee_points.T).T)
        return ee_velocities.reshape(-1)

    def take_observation(self):
        """
        Take observation from the environment and return it.
        TODO: define return type
        """
        obs_message = self._observation_msg
        if obs_message is None:
            return None

        # Collect the end effector points and velocities in
        # cartesian coordinates for the process_observationsstate.
        # Collect the present joint angles and velocities from ROS for the state.
        last_observations = self.process_observations(obs_message,
                                                      self.environment)
        # # # Get Jacobians from present joint angles and KDL trees
        # # # The Jacobians consist of a 6x6 matrix getting its from from
        # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
        ee_link_jacobians = self.get_jacobians(last_observations)
        if self.environment['link_names'][-1] is None:
            print("End link is empty!!")
            return None
        else:
            trans, rot = forward_kinematics(
                self.scara_chain,
                self.environment['link_names'],
                last_observations[:self.scara_chain.getNrOfJoints()],
                base_link=self.environment['link_names'][0],
                end_link=self.environment['link_names'][-1])

            rotation_matrix = np.eye(4)
            rotation_matrix[:3, :3] = rot
            rotation_matrix[:3, 3] = trans
            # angle, dir, _ = rotation_from_matrix(rotation_matrix)
            # current_quaternion = np.array([angle]+dir.tolist())#

            # # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here
            # current_quaternion = quaternion_from_matrix(rotation_matrix)
            # tgt_quartenion = quaternion_from_matrix(self.target_orientation)

            current_quaternion = quat.from_rotation_matrix(rotation_matrix)
            tgt_quartenion = quat.from_rotation_matrix(self.target_orientation)

            # A  = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T
            #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0]

            quat_error = current_quaternion * tgt_quartenion.conjugate()
            rot_vec_err = quat.as_rotation_vector(quat_error)

            # convert quat to np arrays
            quat_error = quat.as_float_array(quat_error)

            # RK:  revisit this later, we only take one angle difference here!
            angle_diff = 2 * np.arccos(np.clip(quat_error[..., 0], -1., 1.))

            current_ee_tgt = np.ndarray.flatten(
                get_ee_points(self.environment['end_effector_points'], trans,
                              rot).T)
            ee_points = current_ee_tgt - self.realgoal  #self.environment['ee_points_tgt']
            ee_points_jac_trans, _ = self.get_ee_points_jacobians(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot)
            ee_velocities = self.get_ee_points_velocities(
                ee_link_jacobians, self.environment['end_effector_points'],
                rot, last_observations)

            # Concatenate the information that defines the robot state
            # vector, typically denoted asrobot_id 'x'.
            state = np.r_[np.reshape(last_observations, -1),
                          np.reshape(ee_points, -1),
                          np.reshape(rot_vec_err, -1),
                          np.reshape(ee_velocities, -1), ]
            return state

    def rmse_func(self, ee_points):
        """
        Computes the Residual Mean Square Error of the difference between current and desired end-effector position
        """
        rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32))
        return rmse

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - action
            - observation
            - reward
            - done (status)
        """
        self.iterator += 1

        # if not collided:
        #     # Execute "action"
        self._pub.publish(
            self.get_trajectory_message(
                action[:self.scara_chain.getNrOfJoints()]))

        # # Take an observation
        # TODO: program this better, check that ob is not None, etc.
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        self.reward_dist = -self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints():
                    (self.scara_chain.getNrOfJoints() + 3)])
        # careful we have degrees now so we scale with
        orientation_scale = 0.1
        self.reward_orient = -orientation_scale * self.rmse_func(
            self.ob[self.scara_chain.getNrOfJoints() +
                    3:(self.scara_chain.getNrOfJoints() + 6)])
        #scale here the orientation because it should not be the main bias of the reward, position should be
        collided = False

        if self._collision_msg is not None and self._collision_msg.collision1_name and self._collision_msg.collision2_name:
            # print("\ncollision detected: ", self._collision_msg)
            # print("Collision detected")
            collided = True
            self.reward = (self.reward_dist + self.reward_orient) * 6.0
            # print("Reward collision is: ", self.reward)

            # Resets the state of the environment and returns an initial observation.
            # we should avoid this --> huge bottleneck
            rospy.wait_for_service('/gazebo/reset_simulation')
            try:
                self.reset_proxy()
                # go to the previous state before colliding
                # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()]))
            except (rospy.ServiceException) as e:
                print("/gazebo/reset_simulation service call failed")
                # self.goToInit()
                # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_world', Empty)

        else:
            # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3
            # here is the distance block
            if abs(self.reward_dist) < 0.01:
                self.reward = 1 + self.reward_dist  # Make the reward increase as the distance decreases
                print("Reward dist is: ", self.reward)
                if abs(self.reward_orient) < 0.01:
                    self.reward = 1 + self.reward + self.reward_orient
                    print("Reward dist + orient is: ", self.reward)
                else:
                    self.reward = self.reward + self.reward_orient
                    print("Reward dist+(orient>0.01) is: ", self.reward)

            else:
                self.reward = self.reward_dist

        done = bool((abs(self.reward_dist) < 0.001)
                    or (self.iterator > self.max_episode_steps)
                    or (abs(self.reward_orient) < 0.001))

        # Return the corresponding observations, rewards, etc.
        # TODO, understand better what's the last object to return
        # return self.ob, self.reward, done, collided, {}
        return self.ob, self.reward, done, {}

    def goToInit(self):
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()
        # # Go to initial position and wait until it arrives there
        # Wait until the arm is within epsilon of reset configuration.
        self._time_lock.acquire(True, -1)
        with self._time_lock:
            self._currently_resetting = True
        self._time_lock.release()

        if self._currently_resetting:
            epsilon = 1e-3
            reset_action = self.environment['reset_conditions'][
                'initial_positions']
            now_action = self._observation_msg.actual.positions
            du = np.linalg.norm(reset_action - now_action, float(np.inf))
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))
            if du > epsilon:
                self._currently_resetting = True
                self._pub.publish(
                    self.get_trajectory_message(
                        self.environment['reset_conditions']
                        ['initial_positions']))
                time.sleep(3)

    def reset(self):
        """
        Reset the agent for a particular experiment condition.
        """
        # self._pub_rand_light.publish()
        # self._pub_rand_sky.publish()
        # self._pub_rand_physics.publish()
        # self._pub_rand_obstacles.publish()
        # self.randomizeTargetPose("obj")
        # self.randomizeTexture("obj")
        # self.randomizeSize("obj", "box")

        # common_path = self.envs_path + "/assets/urdf/objs/"
        # path_list = [common_path + "rubik_cube/rubik_cube_random.sdf", common_path + "rubik_cube/rubik_cube.sdf",
        #             common_path + "box.sdf", common_path + "cylinder.sdf", common_path + "sphere.urdf"]
        # for pl in path_list:
        #     if pl == self.obj_path:
        #         path_list.remove(pl)
        # self.randomizeObjectType("obj", path_list)

        self.iterator = 0

        if self.reset_jnts is True:
            self._pub.publish(
                self.get_trajectory_message(
                    self.environment['reset_conditions']['initial_positions']))

            # self.randomizeStartPose(-3.13, 3.13)
            # # Generate a new random start pose until it is not a colliding state
            # while self._collision_msg is not None:
            #     self.randomizeStartPose(-3.13, 3.13)

            if (self.slowness_unit == 'sec') or (self.slowness_unit is None):
                time.sleep(int(self.slowness))
            elif (self.slowness_unit == 'nsec'):
                time.sleep(int(self.slowness /
                               1000000000))  # using nanoseconds
            else:
                print("Unrecognized unit. Please use sec or nsec.")

        # Take an observation
        self.ob = self.take_observation()
        while (self.ob is None):
            self.ob = self.take_observation()

        # Return the corresponding observation
        return self.ob