def _get_jacobians(self, state): """Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(6) # Initialize a joint array for the present 6 joint angles. angles = JntArray(6) # Construct the joint array from the most recent joint angles. for i in range(6): angles[i] = state[i] # Initialize a tree structure from the robot urdf. # Note that the xacro of the urdf is updated by hand. # Then the urdf must be compiled. _, ur_tree = treeFromFile(self._hyperparams['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. ur_chain = ur_tree.getChain(self._hyperparams['link_names'][0], self._hyperparams['link_names'][6]) # Initialize a KDL Jacobian solver from the chain. jac_solver = ChainJntToJacSolver(ur_chain) # Update the jacobian by solving for the given angles. jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.zeros((6, 6)) for i in range(jacobian.rows()): for j in range(jacobian.columns()): J[i, j] = jacobian[i, j] # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J[0:3, :] return ee_jacobians
class GazeboMARATopOrientCollisionv0Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "MARATop6DOF_Collision_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self._collision_msg = None self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([-0.40028, 0.095615, 0.72466]) # alex2 EE_POS_TGT = np.asmatrix([-0.580238, -0.179591, 0.72466]) # rubik touching the bar # EE_ROT_TGT = np.asmatrix([[-0.00128296, 0.9999805 , 0.00611158], # [ 0.00231397, -0.0061086 , 0.99997867], # [ 0.9999965 , 0.00129708, -0.00230609]]) # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981]) # EE_POS_TGT = np.asmatrix([-0.4023037912211465, 0.15501116706606247, 0.7238499613771884]) # 200 cm from the z axis # # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) # original orientation EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position # INITIAL_JOINTS = np.array([0., 0., 1., 0., 1.57, 0.]) INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' MOTOR6_JOINT = 'motor6' # Set constants for links TABLE = 'table' BASE = 'base_link' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT ] LINK_NAMES = [ TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = rospkg.RosPack().get_path( "mara_description") + "/urdf/mara_demo_camera_top.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'ee_point_tgt_orient': self.target_orientation, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState, self.collision_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 recently also added quaternion to the obs, which has dimension=4 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 7 #6 hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) # high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.inf * np.ones(self.scara_chain.getNrOfJoints()) # high = np.inf * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.reset_world = rospy.ServiceProxy('/gazebo/reset_world', Empty) model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = EE_POS_TGT[0, 2] #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") # Seed the environment # Seed the environment self._seed() # def collision_callback(self, message): # """ # Callback method for the subscriber of Collision data # """ # # # if "puzzle_ball_joints::cubie" not in message.collision1_name and "puzzle_ball_joints::cubie" not in message.collision2_name: # if "robot::motor6_link::motor6_link_fixed_joint_lump__robotiq_arg2f_base_link_collision_1" not in message.collision1_name and "robot::left_outer_finger::left_outer_finger_collision" not in message.collision2_name: # if "puzzle_ball_joints::cubie" not in message.collision1_name or "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name: # if message.collision1_name and message.collision2_name: # self._collision_msg = message # # print("self._collision_msg: ", self._collision_msg) def collision_callback(self, message): """ Callback method for the subscriber of Collision data """ self._collision_msg = None if message.collision1_name is not message.collision2_name: if "puzzle_ball_joints::cubie" not in message.collision1_name and "puzzle_ball_joints::cubie" not in message.collision2_name: if "puzzle_ball_joints::cubie" not in message.collision1_name or "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name: if "robot::motor6_link::motor6_link_fixed_joint_lump__robotiq_arg2f_base_link_collision_1" not in message.collision1_name and "robot::left_outer_finger::left_outer_finger_collision" not in message.collision2_name: self._collision_msg = message def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True): self.slowness = slowness self.slowness_unit = slowness_unit self.reset_jnts = reset_jnts print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts)) def randomizeTargetPositions(self): """ The goal is to test with randomized positions which range between the boundries of the H-ROS logo """ print("In randomize target positions.") EE_POS_TGT_RANDOM1 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the first half H-ROS letters with +-0.01 offset EE_POS_TGT_RANDOM2 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the H-ROS letters with +-0.01 offset # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1 ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeTarget(self): print("calling randomize target") EE_POS_TGT_1 = np.asmatrix([-0.189383, -0.123176, 0.894476]) # point 1 EE_POS_TGT_2 = np.asmatrix([-0.359236, 0.0297278, 0.760402]) # point 2 EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeMultipleTargets(self): print("calling randomize multiple target") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.2868]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.2868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here # current_quaternion = quaternion_from_matrix(rotation_matrix) # tgt_quartenion = quaternion_from_matrix(self.target_orientation) current_quaternion = quat.from_rotation_matrix(rotation_matrix) # print("current_quaternion: ", current_quaternion) tgt_quartenion = quat.from_rotation_matrix(self.target_orientation) # A = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0] # this is wrong!!!! Substraction should be replaced by: quat_error = current_quaternion * tgt_quartenion.conjugate() # quat_error = current_quaternion - tgt_quartenion quat_error = current_quaternion * tgt_quartenion.conjugate() # convert quat to np arrays quat_error = quat.as_float_array(quat_error) # RK: revisit this later, we only take one angle difference here! angle_diff = 2 * np.arccos(np.clip(quat_error[..., 0], -1., 1.)) # print("quat error: ", quat_error) # print("quat_error[..., 0]: ", quat_error[..., 0]) # print("quat_error: ",quat_error) # print("angle_diff: ", angle_diff) # print("self.realgoal: ", self.realgoal) # print("curr quat: ", current_quaternion) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(angle_diff, -1), np.reshape(ee_velocities, -1), ] # print("quat_error: ", quat_error) # print("ee_points:", ee_points) # print("angle_diff: ", angle_diff) return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(angle_diff, -1), np.reshape(ee_velocities, -1), ] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action, prevac): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator += 1 # rmse_trans = self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # rmse_orient = self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)]) # # print("rmse_orient: ", self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)]) # self.reward_dist = -rmse_trans # self.reward_orient = -rmse_orient # # + self.ob[(self.scara_chain.getNrOfJoints()+4)] ) # # # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 # if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005): # self.reward_final_dist = 1 + self.reward_dist # Make the reward increase as the distance decreases # print("Reward Pose is: ", self.reward_final_dist) # else: # self.reward_final_dist = self.reward_dist self.reward_dist = -self.rmse_func( self.ob[self.scara_chain.getNrOfJoints(): (self.scara_chain.getNrOfJoints() + 3)]) # careful we have degrees now so we scale with orientation_scale = 0.01 # print("orientation reward: ", self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+4)]) self.reward_orient = -orientation_scale * abs( self.ob[self.scara_chain.getNrOfJoints() + 3:(self.scara_chain.getNrOfJoints() + 4)] ) #self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+4)])*0.1 # print("self.reward_orient: ", self.reward_orient) # print(self.reward_orient) # print("self.reward_orient: ", self.reward_orient) #scale here the orientation because it should not be the main bias of the reward, position should be collided = False if self._collision_msg is not None and self._collision_msg.collision1_name and self._collision_msg.collision2_name: # print("\ncollision detected: ", self._collision_msg) print("Collision detected") collided = True self.reward = (self.reward_dist + self.reward_orient) * 4.0 # print("Reward collision is: ", self.reward) # Resets the state of the environment and returns an initial observation. # we should avoid this --> huge bottleneck rospy.wait_for_service('/gazebo/reset_simulation') try: self.reset_proxy() # go to the previous state before colliding self._pub.publish( self.get_trajectory_message( prevac[:self.scara_chain.getNrOfJoints()])) except (rospy.ServiceException) as e: print("/gazebo/reset_simulation service call failed") # self.goToInit() # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_world', Empty) else: # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 # here is the distance block if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) < 0.01): self.reward = 1 + self.reward_dist # Make the reward increase as the distance decreases print("Reward is: ", self.reward) else: self.reward = self.reward_dist # print("Reward is (minus): ", self.reward) # take into account the orientation if (abs(self.reward_orient) < 0.005): self.reward = self.reward + (1 + self.reward_orient) print("Reward orientation is: ", self.reward) else: self.reward = self.reward + self.reward_orient # * self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)]) # print("Reward orientation is (minus): ", self.reward) if self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) < 0.01 and abs( self.reward_orient) < 0.005: self.reward = 10 * (2 + self.reward_dist + self.reward_orient) print("Reward hit the target, and is: ", self.reward) # self.reward =self.reward - abs(self.ob[(self.scara_chain.getNrOfJoints()+4)]) # Calculate if the env has been solved done = bool(((abs(self.reward_dist) < 0.01) and (abs(self.reward_orient)) < 0.005) or (self.iterator > self.max_episode_steps)) if not collided: # Execute "action" self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, collided, {} def goToInit(self): self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # # Go to initial position and wait until it arrives there # Wait until the arm is within epsilon of reset configuration. self._time_lock.acquire(True, -1) with self._time_lock: self._currently_resetting = True self._time_lock.release() if self._currently_resetting: epsilon = 1e-3 reset_action = self.environment['reset_conditions'][ 'initial_positions'] now_action = self._observation_msg.actual.positions du = np.linalg.norm(reset_action - now_action, float(np.inf)) self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if du > epsilon: self._currently_resetting = True self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions'] ['initial_positions'])) time.sleep(3) def _reset(self): """ Reset the agent for a particular experiment condition. """ self.iterator = 0 if self.reset_jnts is True: self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif (self.slowness_unit == 'nsec'): time.sleep(int(self.slowness / 1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class GazeboModularScara4DOFv3Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara4_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 100 # this is specific parameter for the acktr algorithm. Not used in ppo1, trpo... self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([2.0, -2.0, -2.0, 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. slowness = 10000000 # 1 is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' # Set constants for links BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' SCARA_MOTOR4 = 'scara_e1_motor4' SCARA_INSIDE_MOTOR4 = 'scara_e1_motor4_inside' SCARA_SUPPORT_MOTOR4 = 'scara_e1_motor4_support' SCARA_BAR_MOTOR4 = 'scara_e1_bar4' SCARA_FIXBAR_MOTOR4 = 'scara_e1_fixbar4' SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, SCARA_BAR_MOTOR3, SCARA_FIXBAR_MOTOR3, SCARA_MOTOR4, SCARA_INSIDE_MOTOR4, SCARA_SUPPORT_MOTOR4, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_4joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.environment = { 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # # Gazebo specific services to start/stop its behavior and # # facilitate the overall RL environment # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # Seed the environment self._seed() def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. # target.time_from_start.secs = self.environment['slowness'] target.time_from_start.nsecs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present 6 joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # Take an observation # TODO: program this better, check that ob is not None, etc. # self.ob = take_observation() # self.reward_dist = self.rmse_func(self.ob[3:6]) # # print("reward_dist :", self.reward_dist) # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases # # # Calculate if the env has been solved # done = bool(abs(self.reward_dist) < 0.005) self.reward_dist = -self.rmse_func( self.ob[self.scara_chain.getNrOfJoints(): (self.scara_chain.getNrOfJoints() + 3)]) # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) < 0.005): self.reward = 1 - self.rmse_func( self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) # Make the reward increase as the distance decreases print("Reward is: ", self.reward) else: self.reward = self.reward_dist # print("reward: ", self.reward) # print("rmse_func: ", self.rmse_func(ee_points)) # Calculate if the env has been solved done = bool(abs(self.reward_dist) < 0.005) # # Unpause simulation # rospy.wait_for_service('/gazebo/unpause_physics') # try: # self.unpause() # except (rospy.ServiceException) as e: # print ("/gazebo/unpause_physics service call failed") # Execute "action" self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) #TODO: wait until action gets executed # When adding this the algorithm does not converge # time.sleep(int(self.environment['slowness'])) # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def goToInit(self): self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # # Go to initial position and wait until it arrives there # Wait until the arm is within epsilon of reset configuration. self._time_lock.acquire(True, -1) with self._time_lock: self._currently_resetting = True self._time_lock.release() if self._currently_resetting: epsilon = 1e-3 reset_action = self.environment['reset_conditions'][ 'initial_positions'] now_action = self._observation_msg.actual.positions du = np.linalg.norm(reset_action - now_action, float(np.inf)) self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if du > epsilon: self._currently_resetting = True self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions'] ['initial_positions'])) time.sleep(3) def _reset(self): # """ # Reset the agent for a particular experiment condition. # """ # # Resets the state of the environment and returns an initial observation. # rospy.wait_for_service('/gazebo/reset_simulation') # try: # #reset_proxy.call() # self.reset_proxy() # except (rospy.ServiceException) as e: # print ("/gazebo/reset_simulation service call failed") # # # Unpause simulation # rospy.wait_for_service('/gazebo/unpause_physics') # try: # self.unpause() # except (rospy.ServiceException) as e: # print ("/gazebo/unpause_physics service call failed") # time.sleep(int(self.environment['slowness'])) # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds # # Pause the simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # self.goToInit() # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class GazeboModularScara3DOFStaticObstaclev1Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name #gazebo_env.GazeboEnv.__init__(self, "ModularScara3_Obstacles_v0.launch") gazebo_env.GazeboEnv.__init__(self, "ModularScara3_Obstacles_urdf_simplified_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self._collision_msg = None ### self._torque_msg = None ### self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # limit the max episode step # class variable that iterates to accounts for number of steps per episode self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.mod = 100 self.init_torque = 0.0 self.init_torque_array = np.zeros(3) self.ee_point_matrix = [] ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.26342]) # 26342 center of S # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # make sure that the slowness is set properly!! In case we forget them defaults to seconds. # if self.slowness is None: # slowness = 1 # # else: # slowness = self.slowness # slowness = 100000000 # 1 is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' MOTOR3_TORQUE_SUBSCRIBER = '/motor3_torque' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base' # BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' # SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' # SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' # SCARA_BAR_MOTOR1 = 'scara_e1_bar1' # SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' # SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' # SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' # SCARA_BAR_MOTOR2 = 'scara_e1_bar2' # SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' # SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' # SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' # SCARA_BAR_MOTOR3 = 'scara_e1_bar3' # SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [SCARA_MOTOR1,SCARA_MOTOR2, SCARA_MOTOR3, SCARA_RANGEFINDER, EE_LINK] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# rospack = rospkg.RosPack() prefix_path = rospack.get_path('scara_e1_description') if(prefix_path==None): print("I can't find scara_e1_description") sys.exit(0) URDF_PATH = prefix_path + "/urdf/scara_e1_3joints_simplified.urdf" #URDF_PATH = prefix_path + "/urdf/scara_e1_3joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # self._sub_coll = rospy.Subscriber('/gazebo_contacts',ContactState, self.collision_callback) ## self._sub_torque = rospy.Subscriber('/motor3_torque',WrenchStamped, self.torque_callback) ## # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. # print("self.environment['link_names'][0]:", self.environment['link_names'][0]) # print("self.environment['link_names'][-1]:", self.environment['link_names'][-1]) self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] print("nr of joints: ", self.scara_chain.getNrOfJoints()) # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() #low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) #high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0,0]; pose.position.y = EE_POS_TGT[0,1]; pose.position.z = 0.055#EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0; pose.orientation.y= 0; pose.orientation.z = 0; pose.orientation.w = 0; reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") #self.addObstacle() # Seed the environment self._seed() # self.max_steps_episode = 1000 def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=False): self.slowness = slowness self.slowness_unit = slowness_unit print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) def randomizeObstacle(self): print("calling randomize obstacle") #EE_POS_TGT_1 = np.asmatrix([0.3239543, 0.0083323, 0.3746]) #center of R #EE_POS_TGT_1 = np.asmatrix([0.29557, -0.0422738, 0.3746]) # R top left EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571, 0.26342]) #center of S 0.26342 #EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571, 0.3746]) # S center #EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.26342]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ #self.realgoal = target1 if np.random.uniform() < 0.5 else target2 if np.random.uniform() < 0.5: self.addObstacle() #self.realgoal = target2 self.realgoal = target1 print("\n\nOBSTACLE\n\n") else: # self.removeObstacle() self.addObstacle() self.realgoal = target1 print("\n\nNO OBSTACLE\n\n") print("randomizeObstacle realgoal: ", self.realgoal) #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])]) # 0 = obstacle. 1 = no obstacle. # self.realgoal = 0 # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H # RK: do we need this function? def setPenalizationMod(self, pen_mod): if pen_mod == 1: self.mod = 100 elif pen_mod == 2: self.mod = 200 elif pen_mod == 3: self.mod = 300 elif pen_mod == 4: self.mod = 400 elif pen_mod == 5: self.mod = 500 elif pen_mod == 6: self.mod = 600 elif pen_mod == 7: self.mod = 700 elif pen_mod == 8: self.mod = 800 #def randomizeCorrect(self): def randomizeTarget(self): print("calling randomize correct") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])]) # 0 = obstacle. 1 = no obstacle. # self.realgoal = 0 # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H def collision_callback(self, message): """ Callback method for the subscriber of Collision data """ # print("\ncollision: ", self._collision_msg) self._collision_msg = message def torque_callback(self, message): """ Callback method for the subscriber of Collision data """ self._torque_msg = message # <<<<<<< HEAD # if self.init_torque == 0.0: # self.init_torque_array = [message.wrench.torque.x, message.wrench.torque.y, message.wrench.torque.z] # self.init_torque = np.linalg.norm(self.init_torque_array) # print("Init force at motor 3 is: ", self.init_torque) # # print("\nTorque: ", self._torque_msg) # ======= # #print("\nTorque: ", self._torque_msg) # >>>>>>> 5534657d7384ea47ae3ebbf2ddc1ff9b68d9a0be def normals_callback(self, message): """ Callback method for the subscriber of Collision data """ self._normals_msg = message def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def get_trajectory_message(self, action,set_const_vel=False, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float if set_const_vel: target.velocities = 0.0005*np.ones(self.scara_chain.getNrOfJoints()) target.effort = 0.005*np.ones(self.scara_chain.getNrOfJoints()) target.time_from_start.secs = 4 else: # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # target.time_from_start.nsecs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty"); # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all(map(lambda x,y: x in y, message.joint_names, [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array(message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics(self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) self.current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = self.current_ee_tgt - self.realgoal#self.environment['ee_points_tgt'] # #calculate more sophisticated distance measurement # self.ee_point_matrix.append(np.array(ee_points)) # # print("\nself.ee_point_matrix: ", self.ee_point_matrix) # if len(self.ee_point_matrix)>3: # # d = pdist(self.ee_point_matrix,'mahalanobis', VI=None) # print("len(self.ee_point_matrix): ",len(self.ee_point_matrix), "self.ee_point_matrix: ", self.ee_point_matrix) # if len(self.ee_point_matrix)>10: # self.ee_point_matrix = [] # print("current_ee_tgt: ", current_ee_tgt) # print("ee_points:", ee_points) ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities(ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1),] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1),] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def log_torque_func(self, torque_diff): """ Computes the Log of the Eucledian Distance Error between current and desired end-effector position """ log_dist = np.log(torque_diff, dtype=np.float32) log_dist[log_dist == -np.inf] = 0.0 log_dist = np.nan_to_num(log_dist) return log_dist def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator+=1 self.reward_dist = -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005): self.reward = 1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases print("Reward is: ", self.reward) # elif(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.08): # self.reward = 0.5 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases # print("Reward is: ", self.reward) else: self.reward = self.reward_dist # d = pdist(self.ee_point_matrix,'mahalanobis', VI=None) # print("d_hamming: ", d) # Calculate if the env has been solved # <<<<<<< HEAD # done = (bool(abs(self.reward_dist) < 0.008)) or (self.iterator > self.max_episode_steps) # # # # # #calculate the linnorm of the force at the motor3 # #1. put current torques in an array # curr_torque_array = [self._torque_msg.wrench.torque.x, self._torque_msg.wrench.torque.y, self._torque_msg.wrench.torque.z] # torque_motor3 = np.linalg.norm(curr_torque_array) # # diff_torques = np.subtract(curr_torque_array, self.init_torque_array) # # log_torques = self.log_torque_func(diff_torques) # # print("log_torques: ", log_torques) # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) # # if self.init_torque > 0 and torque_motor3 > self.init_torque: # # print("collision detected with torque at motor3: ", torque_motor3, "init_torque: ", self.init_torque) # proportion_torque = np.divide(torque_motor3,self.init_torque) # # print("diff between init force and collision: ", proportion_force) # if proportion_torque > 2.0: #and proportion_force <= 100.0: # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()], set_const_vel=True)) # self.reward = self.reward - np.divide((proportion_torque + 0.5*np.linalg.norm(log_torques)),1000) # # print("c1 self.reward:", self.reward) # if(proportion_torque <5 and (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.08)): # self.reward = 0.5 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) - np.divide((proportion_torque + 0.5*np.linalg.norm(log_torques)),1000) # print("c2 self.reward:", self.reward) # # # # print("at end reward: ", self.reward) # # # print("force_motor3: ", force_motor3) # # # #REWARD SHAPING sophisticated penalization based on https://arxiv.org/pdf/1609.07845.pdf # # if abs(self._torque_msg.wrench.force.x) > 100: # # print("self.self._torque_msg.force.x>100: ", self._torque_msg.wrench.force.x) # # # # contact_depths = 100 * self._collision_msg.depths[0] #make them in mm otherwise we have too many decimals # # print("\self.self._torque_msg", self.self._torque_msg, "reward: ", self.reward) # # # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) # # #we always assume that depths is positive here. Sometimes depths is negative (actually most of the time). # # # changing to abs value of depths # # if contact_depths > 0 and abs(contact_depths) < 0.0001: # # self.reward = self.reward - (abs(contact_depths))/2 # # print("\n cond1, contact_depths", contact_depths, "reward: ", self.reward) # # elif abs(contact_depths) > 0.0001: #contact_depths > 0 and # # self.reward = self.reward - abs(contact_depths) # # print("\n cond2, contact_depths", contact_depths, "reward: ", self.reward) # # else: # # # self.reward = -100 # # print("cond3, self._collision_msg.depths[0]:", contact_depths) # ======= done = (bool(abs(self.reward_dist) < 0.005)) or (self.iterator > self.max_episode_steps) # COLLISION DEPTH BASED REWARD SHAPING # #REWARD SHAPING sophisticated penalization based on https://arxiv.org/pdf/1609.07845.pdf # if self._collision_msg.collision2_name or self._collision_msg.collision1_name: #else # contact_depths = 100 * self._collision_msg.depths[0] #make them in mm otherwise we have too many decimals # print("\ncontact_depths", contact_depths, "reward: ", self.reward) # # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) # #we always assume that depths is positive here. Sometimes depths is negative (actually most of the time). # # changing to abs value of depths # if contact_depths > 0 and abs(contact_depths) < 0.0001: # self.reward = self.reward - (abs(contact_depths))/2 # print("\n cond1, contact_depths", contact_depths, "reward: ", self.reward) # elif abs(contact_depths) > 0.0001: #contact_depths > 0 and # self.reward = self.reward - abs(contact_depths) # print("\n cond2, contact_depths", contact_depths, "reward: ", self.reward) # else: # # self.reward = -100 # print("cond3, self._collision_msg.depths[0]:", contact_depths) # TORQUE BASED REWARD SHAPING if self._torque_msg is not None: torque_x = self._torque_msg.wrench.torque.x torque_y = self._torque_msg.wrench.torque.y torque_z = self._torque_msg.wrench.torque.z torque_value = np.sqrt(torque_x * torque_x + torque_y * torque_y + torque_z * torque_z) / 1000 print("\n Torque value:", torque_value) if torque_value > 0.01 and torque_value < 0.1: self.reward = self.reward - (abs(torque_value)) / 2 print("\n Reward, torque penalization", self.reward) elif torque_value > 0.01 and torque_value > 0.1: self.reward = self.reward - (abs(torque_value)) print("\n Reward, torque penalization", self.reward) # >>>>>>> 5534657d7384ea47ae3ebbf2ddc1ff9b68d9a0be # # Take an observation self.ob = self.take_observation() while(self.ob is None): self.ob = self.take_observation() # Return the corresponding observations, rewards, etc. ee_point = self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)] + self.realgoal ee_point_eucledian = np.linalg.norm(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) #return self.ob, self.reward, done, ee_point,{} # #calculate more sophisticated distance measurement # self.ee_point_matrix.append(np.array(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])) # # print("\nself.ee_point_matrix: ", self.ee_point_matrix) # if len(self.ee_point_matrix)>3: # # d = pdist(self.ee_point_matrix,'mahalanobis', VI=None) # print("len(self.ee_point_matrix): ",len(self.ee_point_matrix), "self.ee_point_matrix: ", self.ee_point_matrix) # if len(self.ee_point_matrix)>10: # self.ee_point_matrix = [] return self.ob, self.reward, done, {} def addObstacle(self): rospy.wait_for_service('/gazebo/spawn_urdf_model') try: # #NORMAL BOX model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <box size=\".1 .1 1\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"blue\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"10.\"/>\ <inertia ixx=\"1\" ixy=\".0\" ixz=\"0.0\" iyy=\"1\" iyz=\"0.0\" izz=\"1\"/>\ </inertial>\ <collision>\ <geometry>\ <box size=\".1 .1 1\"/>\ </geometry>\ <surface>\ <contact>\ <ode>\ <min_depth>\"0.001\"</min_depth>\ <max_depth>\"0.01\"</max_depth>\ <kp>\"1e6\"</kp>\ <kd>\"1.000000\"</kd>\ <max_velocity>\"1.000000\"</max_velocity>\ </ode>\ </contact>\ </surface>\ </collision>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0.5\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/Blue</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() number = random.randint(0, 4) #pose.position.x = 0.15;# pose.position.x = 0.3;# pose.position.y = 0.07;# pose.position.z = 0.05; pose.orientation.x = 0; pose.orientation.y= 0; pose.orientation.z = 0; pose.orientation.w = 0; #BOX # model_xml = "<?xml version=\"1.0\"?> \ # <robot name=\"myfirst\"> \ # <link name=\"world\"> \ # </link>\ # <link name=\"box0\">\ # <visual>\ # <geometry>\ # <box>\ # <size>0.2 0.2 1.0</size>\ # </box>\ # </geometry>\ # <origin xyz=\"0 0 0\"/>\ # <material name=\"blue\">\ # <ambient>0.5 0.5 1.0 0.1</ambient>\ # <diffuse>0.5 0.5 1.0 0.1</diffuse>\ # </material>\ # </visual>\ # <inertial>\ # <mass value=\"5.0\"/>\ # <inertia>\ # <ixx>1.0</ixx>\ # <ixy>0</ixy>\ # <ixz>0</ixz>\ # <iyy>1.0</iyy>\ # <iyz>0</iyz>\ # <izz>1.0</izz>\ # </inertia>\ # </inertial>\ # <collision>\ # <geometry>\ # <box>\ # <size>0.2 0.2 1.0</size>\ # </box>\ # </geometry>\ # </collision>\ # </link>\ # <joint name=\"world_to_base\" type=\"fixed\"> \ # <origin xyz=\"0 0 0.7\" rpy=\"0 0 0\"/>\ # <parent link=\"world\"/>\ # <child link=\"box0\"/>\ # </joint>\ # <gazebo reference=\"box0\">\ # <material>Gazebo/Blue</material>\ # </gazebo>\ # </robot>" # model_xml = "<?xml version=\"1.0\"?> \ # <robot name=\"myfirst\"> \ # <link name=\"world\"> \ # </link>\ # <link name=\"cylinder0\">\ # <visual>\ # <geometry>\ # <sphere radius=\"0.01\"/>\ # </geometry>\ # <origin xyz=\"0 0 0\"/>\ # <material name=\"rojotransparente\">\ # <ambient>0.5 0.5 1.0 0.1</ambient>\ # <diffuse>0.5 0.5 1.0 0.1</diffuse>\ # </material>\ # </visual>\ # <inertial>\ # <mass value=\"5.0\"/>\ # <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ # </inertial>\ # </link>\ # <joint name=\"world_to_base\" type=\"fixed\"> \ # <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ # <parent link=\"world\"/>\ # <child link=\"cylinder0\"/>\ # </joint>\ # <gazebo reference=\"cylinder0\">\ # <material>Gazebo/GreenTransparent</material>\ # </gazebo>\ # </robot>" # robot_namespace = "" # pose = Pose() # number = random.randint(0, 4) # # if number == 1: # # pose.position.x = 0.25; # # pose.position.y = 0.07; # # if number == 2: # # pose.position.x = 0.25; # # pose.position.y = -0.07; # # if number == 3: # # pose.position.x = 0.23; # # pose.position.y = -0.087; # # if number == 4: # # pose.position.x = 0.23; # # pose.position.y = 0.087; # # pose.position.x = 0.25;# # pose.position.y = 0.07;# # # pose.position.z = 0.0; # pose.orientation.x = 0; # pose.orientation.y= 0; # pose.orientation.z = 0; # pose.orientation.w = 0; reference_frame = "" if number > -1: self.add_model(model_name="obstacle", model_xml=model_xml, robot_namespace=robot_namespace, initial_pose=pose, reference_frame="") except (rospy.ServiceException) as e: print ("/gazebo/spawn_urdf_model service call failed") def removeObstacle(self): rospy.wait_for_service('/gazebo/delete_model') try: self.remove_model(model_name="obstacle") except (rospy.ServiceException) as e: print ("/gazebo/spawn_urdf_model service call failed") def _reset(self): # """ # Reset the agent for a particular experiment condition. # """ self.iterator = 0 msg = self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']) msg.points[0].time_from_start.secs = 0 msg.points[0].time_from_start.nsecs = 10000 self._pub.publish(msg) # time.sleep(int(self.environment['slowness'])) # using seconds # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif(self.slowness_unit == 'nsec'): time.sleep(int(self.slowness/1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while(self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class GazeboMARATopCollisionv0Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "MARATop6DOF_Collision_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms, this should reflect the lstm step size, otherwqise it restarts two times self.rand_target_thresh = 40 self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self._collision_msg = None self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([-0.40028, 0.095615, 0.72466 + 0.1]) # alex2 # EE_POS_TGT = np.asmatrix([-0.386752, -0.000756, 1.40557]) # easy point # EE_POS_TGT = np.asmatrix([-0.173762, -0.0124312, 1.60415]) # for testing collision_callback # EE_POS_TGT = np.asmatrix([-0.580238, -0.179591, 0.72466]) # rubik touching the bar # EE_ROT_TGT = np.asmatrix([[-0.00128296, 0.9999805 , 0.00611158], # [ 0.00231397, -0.0061086 , 0.99997867], # [ 0.9999965 , 0.00129708, -0.00230609]]) # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981]) # EE_POS_TGT = np.asmatrix([-0.4023037912211465, 0.15501116706606247, 0.7238499613771884]) # 200 cm from the z axis # # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) # EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) # original orientation EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position # INITIAL_JOINTS = np.array([0., 0., 1., 0., 1.57, 0.]) INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' LINK_STATE_PUBLISHER = '/gazebo/set_link_state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' MOTOR6_JOINT = 'motor6' # Set constants for links TABLE = 'table' BASE = 'base_link' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT ] LINK_NAMES = [ TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = rospkg.RosPack().get_path( "mara_description") + "/urdf/mara_demo_camera_top.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'ee_point_tgt_orient': self.target_orientation, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory, queue_size=1) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState, self.collision_callback) self._pub_link_state = rospy.Publisher(LINK_STATE_PUBLISHER, LinkState, queue_size=1) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 recently also added quaternion to the obs, which has dimension=4 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 9 #7 #6 hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) high = -low self.action_space = spaces.Box(low, high, dtype=np.float32) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high, dtype=np.float32) self.add_model_urdf = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = EE_POS_TGT[0, 2] pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "world" self.assets_path = os.path.abspath( os.path.join(rospkg.RosPack().get_path("gazebo_domain_randomizer"), os.pardir)) + "/assets" file_xml = open(self.assets_path + '/models/urdf/target_point.urdf', mode='r') model_xml = file_xml.read() file_xml.close() rospy.wait_for_service('/gazebo/spawn_urdf_model') try: self.add_model_urdf(model_name="target", model_xml=model_xml, robot_namespace=robot_namespace, initial_pose=pose, reference_frame=reference_frame) except rospy.ServiceException as e: print('Error adding urdf model') self._seed() def collision_callback(self, message): """ Callback method for the subscriber of Collision data """ if message.collision1_name != message.collision2_name: # neither obj nor obstacle colliding with table if "obj::" not in message.collision1_name and "obstacle" not in message.collision1_name or "table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name: # neither obj colliding with obstacle and vice-versa nor objs colliding each other nor obstacles colliding each other if "obj::" not in ( message.collision1_name and message.collision2_name ) and "obstacle" not in (message.collision1_name and message.collision2_name): self._collision_msg = message def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True): self.slowness = slowness self.slowness_unit = slowness_unit self.reset_jnts = reset_jnts print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts)) def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ obs_message = self._observation_msg if obs_message is None: return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # current_quaternion = np.array([angle]+dir.tolist())# # # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here # current_quaternion = quaternion_from_matrix(rotation_matrix) # tgt_quartenion = quaternion_from_matrix(self.target_orientation) current_quaternion = tf.quaternions.mat2quat(rot) #[w, x, y ,z] tgt_quartenion = tf.quaternions.mat2quat(self.target_orientation) # A = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0] quat_error = current_quaternion * tgt_quartenion.conjugate() rot_vec_err, _ = tf.quaternions.quat2axangle(quat_error) # convert quat to np arrays # quat_error = np.asarray(quat_error, dtype=np.quaternion).view((np.double, 4)) # RK: revisit this later, we only take one angle difference here! # angle_diff = 2 * np.arccos(np.clip(quat_error[..., 0], -1., 1.)) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(rot_vec_err, -1), np.reshape(ee_velocities, -1), ] return state def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): """ Implement the environment step abstraction. Execute action and returns: - action - observation - reward - done (status) """ self.iterator += 1 self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() self.reward_dist = -self.rmse_func( self.ob[self.scara_chain.getNrOfJoints(): (self.scara_chain.getNrOfJoints() + 3)]) if self._collision_msg is not None: if self._collision_msg.collision1_name is None: raise AttributeError("collision1_name is None") if self._collision_msg.collision2_name is None: raise AttributeError("collision2_name is None") self.reward = self.reward_dist * 2.0 # Resets the state of the environment and returns an initial observation. # we should avoid this --> huge bottleneck rospy.wait_for_service('/gazebo/reset_simulation') try: self.reset_proxy() pose = Pose() pose.position.x = self.realgoal[0] pose.position.y = self.realgoal[1] pose.position.z = self.realgoal[2] pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 self._pub_link_state.publish( LinkState(link_name="target_link", pose=pose, reference_frame="world")) except (rospy.ServiceException) as e: print("/gazebo/reset_simulation service call failed") else: # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 # here is the distance block if abs(self.reward_dist) < 0.005: self.reward = 1 + self.reward_dist # Make the reward increase as the distance decreases print("Reward dist is: ", self.reward) else: self.reward = self.reward_dist done = bool((abs(self.reward_dist) < 0.005) or (self.iterator > self.max_episode_steps)) # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return self._collision_msg = None return self.ob, self.reward, done, {} def reset(self): """ Reset the agent for a particular experiment condition. """ self.iterator = 0 if self.reset_jnts is True: self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif (self.slowness_unit == 'nsec'): time.sleep(int(self.slowness / 1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class GazeboMARATopOrientVisionv0Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "MARATop3DOF_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self.detect_target_once = 1 self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981]) EE_POS_TGT = np.asmatrix([ -0.53170885, -0.02076771, 0.74240961 ]) # 200 cm from the z axis some random target at the begining # EE_POS_TGT = np.asmatrix([pose_rubik_pred.position.x, pose_rubik_pred.position.y, pose_rubik_pred.position.z]) # print("EE_POS_TGT: ", EE_POS_TGT) # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H # EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) # EE_POS_TGT = np.asmatrix([0.0, 0.0, 1.4]) # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_ROT_TGT = np.asmatrix([[-0.99500678, 0.09835458, -0.01696725], [-0.09951792, -0.99061751, 0.09366505], [-0.00759566, 0.0948859, 0.99545918]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position # INITIAL_JOINTS = np.array([0., 0., 1.57, 0., 1.57, 0.]) # INITIAL_JOINTS = np.array([0., 0., 1.37, 0., 1.37, 0.]) INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # INITIAL_JOINTS = np.array([2.832116288509212e-05, -7.644633833070458e-05, -0.9999138952294953, -2.4499067147409903e-05, -1.5700625461089226, 1.4725492722966749e-05]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' MOTOR6_JOINT = 'motor6' # Set constants for links TABLE = 'table' BASE = 'base_link' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT ] LINK_NAMES = [ TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = rospkg.RosPack().get_path( "mara_description") + "/urdf/mara_demo_camera_top.urdf" # URDF_PATH = "/home/rkojcev/catkin_ws/src/mara/mara_description/urdf/mara_demo_camera_top.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) print("ee_tgt: ", ee_tgt) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'ee_point_tgt_orient': self.target_orientation, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) TARGET_SUBSCRIBER = '/mara/target' self._sub_tgt = rospy.Subscriber(TARGET_SUBSCRIBER, Pose, self.tgt_callback) # Instantiate CvBridge # self.bridge = CvBridge() # self._sub_image = rospy.Subscriber("/mara/rgb/image_raw", ImageMsg, self._observation_image_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 recently also added quaternion to the obs, which has dimension=4 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 10 #6 hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) # high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.inf * np.ones(self.scara_chain.getNrOfJoints()) # high = np.inf * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.add_model_sdf = rospy.ServiceProxy('/gazebo/spawn_sdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) self.pub_set_model = rospy.Publisher('/gazebo/set_model_state', ModelState, queue_size=1) self.addTarget() # Seed the environment # Seed the environment self._seed() def tgt_callback(self, msg): # print("Whats the target?: ", msg) # self.realgoal is None and self.target_orientation is None: if self.detect_target_once is 1: print("Get the target from vision, for now just use position.") EE_POS_TGT = np.asmatrix( [msg.position.x, msg.position.y, msg.position.z]) rot_quat = np.quaternion(msg.orientation.x, msg.orientation.y, msg.orientation.z, msg.orientation.w) print("rot_quat: ", rot_quat) rot_matrix = quat.as_rotation_matrix(rot_quat) print("rot_matrix: ", rot_matrix) EE_ROT_TGT = rot_matrix #np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) # rot_matrix # EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) #rot_matrix# EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt = EE_POS_TGT # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 self.target_orientation = ee_rot_tgt print("Predicted target is: ", self.realgoal) self.detect_target_once = 0 self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"sphere0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"sphere0\"/>\ </joint>\ <gazebo reference=\"sphere0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = EE_POS_TGT[0, 2] #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") def addTarget(self): # The idea is to add random target in our case rubik cube and the vision system to detect and find the 3D pose of the cube. # Open a file: file # os.chdir('../assets/urdf/rubik_cube') # print("os: ", os) file = open( '/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/rubik_cube/rubik_cube.sdf', mode='r') # read all lines at once model_xml = file.read() # close the file file.close() rospy.wait_for_service('/gazebo/spawn_urdf_model') pose = Pose() pose.position.x = -0.5074649153217804 #-0.5074649153217804#random.uniform(-0.3, -0.6); pose.position.y = random.uniform( -0.02, 0.01) #0.03617460539210797#random.uniform(-0.02, 0.01) # stay put in Z!!! pose.position.z = 0.72 #0.72#0.80 #0.72; roll = 0.0 #random.uniform(-0.2, 0.6) pitch = 0.0 #random.uniform(-0.2, 0.2) yaw = 0.0 #-0.3#random.uniform(-0.3, 0.3) new_camera_pose = False q_rubik = quat.from_euler_angles(roll, pitch, yaw) # print("q_rubik: ", q_rubik.x, q_rubik.y, q_rubik.z, q_rubik.w) pose.orientation.x = q_rubik.x #0.0#q_rubik[0] pose.orientation.y = q_rubik.y #0.0#q_rubik[1] pose.orientation.z = q_rubik.z #0.0#q_rubik[2] pose.orientation.w = q_rubik.w #0.0#q_rubik[3] print("Real pose is: ", pose) try: self.add_model_sdf(model_name="puzzle_ball_joints", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") print("service call ok") except: print('error adding model') self.pub_set_model.publish( ModelState(model_name='puzzle_ball_joints', pose=pose, reference_frame="world")) # try: # self.removeTargetSphere() # except: # print('error removing sphere') def removeTarget(self): rospy.wait_for_service('/gazebo/delete_model') try: self.remove_model(model_name="puzzle_ball_joints") except (rospy.ServiceException) as e: print("/gazebo/spawn_urdf_model service call failed") def removeTargetSphere(self): rospy.wait_for_service('/gazebo/delete_model') try: self.remove_model(model_name="target") except (rospy.ServiceException) as e: print("/gazebo/spawn_urdf_model service call failed") def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True): self.slowness = slowness self.slowness_unit = slowness_unit self.reset_jnts = reset_jnts print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts)) def setTargetPositions(self, msg): """ The goal is to test with randomized positions which range between the boundries of the H-ROS logo """ print("In randomize target positions.") EE_POS_TGT_RANDOM1 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the first half H-ROS letters with +-0.01 offset # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 print("randomizeTarget realgoal: ", self.realgoal) def randomizeTargetPositions(self): """ The goal is to test with randomized positions which range between the boundries of the H-ROS logo """ print("In randomize target positions.") EE_POS_TGT_RANDOM1 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the first half H-ROS letters with +-0.01 offset EE_POS_TGT_RANDOM2 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the H-ROS letters with +-0.01 offset # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1 ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeTarget(self): print("calling randomize target") EE_POS_TGT_1 = np.asmatrix([-0.189383, -0.123176, 0.894476]) # point 1 EE_POS_TGT_2 = np.asmatrix([-0.359236, 0.0297278, 0.760402]) # point 2 EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeMultipleTargets(self): print("calling randomize multiple target") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.2868]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.2868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) tgt_quartenion = quaternion_from_matrix(self.target_orientation) A = np.vstack( [current_quaternion, np.ones(len(current_quaternion))]).T #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0] quat_error = current_quaternion - tgt_quartenion # print("quat_error: ",quat_error) # print("self.realgoal: ", self.realgoal) # print("curr quat: ", current_quaternion) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(quat_error, -1), np.reshape(ee_velocities, -1), ] #print("quat_error: ", quat_error) #print("ee_points:", ee_points) return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(quat_error, -1), np.reshape(ee_velocities, -1), ] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator += 1 # rmse_trans = self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # rmse_orient = self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)]) # # print("rmse_orient: ", self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)]) # self.reward_dist = -rmse_trans # self.reward_orient = -rmse_orient # # + self.ob[(self.scara_chain.getNrOfJoints()+4)] ) # # # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 # if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005): # self.reward_final_dist = 1 + self.reward_dist # Make the reward increase as the distance decreases # print("Reward Pose is: ", self.reward_final_dist) # else: # self.reward_final_dist = self.reward_dist self.reward_dist = -self.rmse_func( self.ob[self.scara_chain.getNrOfJoints(): (self.scara_chain.getNrOfJoints() + 3)]) self.reward_orient = -self.rmse_func( self.ob[self.scara_chain.getNrOfJoints() + 3:(self.scara_chain.getNrOfJoints() + 7)]) # print("self.reward_orient: ", self.reward_orient) #scale here the orientation because it should not be the main bias of the reward, position should be orientation_scale = 0.2 # # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) < 0.005): self.reward = 1 - self.rmse_func( self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) # Make the reward increase as the distance decreases print("Reward position is: ", self.reward) else: self.reward = self.reward_dist # if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints() + 3:( self.scara_chain.getNrOfJoints() + 7)]) < 0.1): self.reward = self.reward + orientation_scale * ( 1 - self.rmse_func( self.ob[self.scara_chain.getNrOfJoints() + 3: (self.scara_chain.getNrOfJoints() + 7)])) print("Reward orientation is: ", self.reward) else: self.reward = self.reward + orientation_scale * self.rmse_func( self.ob[self.scara_chain.getNrOfJoints() + 3: (self.scara_chain.getNrOfJoints() + 7)]) #this is very hard to converge # self.reward = self.reward_dist + orientation_scale*self.reward_orient # # if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])<0.1 and self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005): # self.reward = 10.*((1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])) + orientation_scale * (1 -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)]))) # print("Reward hit the target, and is: ", self.reward) # # # self.reward = self.reward_final_dist + orientation_scale*self.final_rew_orient # self.reward =self.reward - abs(self.ob[(self.scara_chain.getNrOfJoints()+4)]) # Calculate if the env has been solved done = bool(((abs(self.reward_dist) < 0.005) and (abs(self.reward_orient)) < 0.1) or (self.iterator > self.max_episode_steps)) # Execute "action" self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # # this is workout for the collision enviroment, does not work properly till now. # if done: # self.goToInit() # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def goToInit(self): self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # # Go to initial position and wait until it arrives there # Wait until the arm is within epsilon of reset configuration. self._time_lock.acquire(True, -1) with self._time_lock: self._currently_resetting = True self._time_lock.release() if self._currently_resetting: epsilon = 1e-3 reset_action = self.environment['reset_conditions'][ 'initial_positions'] now_action = self._observation_msg.actual.positions du = np.linalg.norm(reset_action - now_action, float(np.inf)) self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if du > epsilon: self._currently_resetting = True self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions'] ['initial_positions'])) time.sleep(3) def _reset(self): """ Reset the agent for a particular experiment condition. """ self.iterator = 0 if self.reset_jnts is True: self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif (self.slowness_unit == 'nsec'): time.sleep(int(self.slowness / 1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class GazeboModularScara3DOFv2Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and ROS 2 interfaces, for now communicating throught a bridge. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0.0, 0.0, 0.0]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # # Set the number of seconds per step of a sample. # TIMESTEP = 0.01 # Typically 0.01. # # Set the number of timesteps per sample. # STEP_COUNT = 100 # Typically 100. # # Set the number of samples per condition. # SAMPLE_COUNT = 5 # Typically 5. # # Set the number of trajectory iterations to collect. # ITERATIONS = 20 # Typically 10. # How much time does it take to execute the trajectory (in seconds) slowness = 1000000 # 1 is real life simulation # slowness = 1 # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" #URDF_PATH = "../assets/urdf/modular_scara/scara_e1_3joints.urdf" import rospkg r = rospkg.RosPack() path = r.get_path('scara_e1_description') URDF_PATH = path + "/urdf/scara_e1_3joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.environment = { # 'dt': TIMESTEP, 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions # rclpy.init(args=None) # self.ros2_node = rclpy.create_node('robot_ai_node') # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER) # # self._callbacks = partial(self.observation_callback, robot_id=0) # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data) # # self._time_lock = threading.RLock() # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self.step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = 9 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # Seed the environment self._seed() def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message # print("!!!!!!!!!Received an observation") # def discretize_observation(self,data,new_ranges): # discretized_ranges = [] # min_range = 0.2 # done = False # mod = len(data.ranges)/new_ranges # for i, item in enumerate(data.ranges): # if (i%mod==0): # if data.ranges[i] == float ('Inf') or np.isinf(data.ranges[i]): # discretized_ranges.append(6) # elif np.isnan(data.ranges[i]): # discretized_ranges.append(0) # else: # discretized_ranges.append(int(data.ranges[i])) # if (min_range > data.ranges[i] > 0): # done = True # return discretized_ranges,done def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. # target.time_from_start.secs = self.environment['slowness'] target.time_from_start.nsecs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(3) # Initialize a joint array for the present 6 joint angles. angles = JntArray(3) # Construct the joint array from the most recent joint angles. for i in range(3): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, 3) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) # def take_observation_ros2(self): # """ # Take observation from the environment and return it. # TODO: define return type # """ # # Take an observation # if rclpy.ok(): # # Only read and process ROS messages if they are fresh. # # TODO: review, robot_id seems specific to Risto's implementation # # # # Acquire the lock to prevent the subscriber thread from # # # updating times or observation messages. # # self._time_lock.acquire(True) # obs_message = self._observation_msg # # # Collect the end effector points and velocities in # # cartesian coordinates for the state. # # Collect the present joint angles and velocities from ROS for the state. # last_observations = self.process_observations(obs_message, self.environment) # if last_observations is None: # print("last_observations is empty") # else: # # # # Get Jacobians from present joint angles and KDL trees # # # # The Jacobians consist of a 6x6 matrix getting its from from # # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) # ee_link_jacobians = self.get_jacobians(last_observations) # if self.environment['link_names'][-1] is None: # print("End link is empty!!") # else: # # print(self.environment['link_names'][-1]) # trans, rot = forward_kinematics(self.scara_chain, # self.environment['link_names'], # last_observations[:3], # base_link=self.environment['link_names'][0], # end_link=self.environment['link_names'][-1]) # # # # rotation_matrix = np.eye(4) # rotation_matrix[:3, :3] = rot # rotation_matrix[:3, 3] = trans # # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # # # current_quaternion = np.array([angle]+dir.tolist())# # # # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here # current_quaternion = quaternion_from_matrix(rotation_matrix) # # current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'], # trans, # rot).T) # ee_points = current_ee_tgt - self.environment['ee_points_tgt'] # # ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians, # self.environment['end_effector_points'], # rot) # ee_velocities = self.get_ee_points_velocities(ee_link_jacobians, # self.environment['end_effector_points'], # rot, # last_observations) # # # # # Concatenate the information that defines the robot state # # vector, typically denoted asrobot_id 'x'. # state = np.r_[np.reshape(last_observations, -1), # np.reshape(ee_points, -1), # np.reshape(ee_velocities, -1),] # # return np.r_[np.reshape(last_observations, -1), # np.reshape(ee_points, -1), # np.reshape(ee_velocities, -1),] # else: # print("Observation is None") # return None def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False # # ROS 2, Acquire the lock to prevent the subscriber thread from # # updating times or observation messages. # self._time_lock.acquire(True) obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) #print("last_observations", last_observations)###### # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:3], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) #print("trans", trans)###### # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) # print("RMSE", rmse) ##### return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): """ Implement the environment step abstraction. Execute action and returns: - observation - reward - done (status) - dictionary (#TODO clarify) """ # Unpause simulation rospy.wait_for_service('/gazebo/unpause_physics') try: self.unpause() except (rospy.ServiceException) as e: print("/gazebo/unpause_physics service call failed") # Execute "action" # if rclpy.ok(): # ROS 2 code #print("self.get_trajectory_message(action[:3])", self.get_trajectory_message(action[:3]))######### self._pub.publish(self.get_trajectory_message(action[:3])) #TODO: wait until action gets executed # time.sleep(int(self.environment['slowness'])) # using seconds time.sleep(int(self.environment['slowness']) / 1000000000) # using nanoseconds #print("Before action observation", self.ob[:3])####### # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() #print("action", action[:3])####### #print("After action observation", self.ob[:3])####### # Pause simulation rospy.wait_for_service('/gazebo/pause_physics') try: #resp_pause = pause.call() self.pause() except (rospy.ServiceException) as e: print("/gazebo/pause_physics service call failed") # Take an observation # TODO: program this better, check that ob is not None, etc. # self.ob = take_observation() # # Old implementation for reward calculation # # Calculate reward based on observation # if np.linalg.norm(self.ob[1]) < 0.005: # self.reward_dist = 1000.0 * np.linalg.norm(self.ob[1])#- 10.0 * np.linalg.norm(ee_points) # # self.reward_ctrl = np.linalg.norm(action)#np.square(action).sum() # done = True # print("self.reward_dist: ", self.reward_dist, "self.reward_ctrl: ", self.reward_ctrl) # else: # self.reward_dist = - np.linalg.norm(self.ob[1]) # # self.reward_ctrl = - np.linalg.norm(action)# np.square(action).sum() # # self.reward = 2.0 * self.reward_dist + 0.01 * self.reward_ctrl # #removed the control reward, maybe we should add it later. # self.reward = self.reward_dist # Calculate the reward # Heuristic used for the calculation of the reward: # - calculate the residual mean square error (rmse) between the current # end effector point and the target point # - the reward is the result of the difference between 2 and the rmse absolute value # this means that the reward increase as the distance decreases. # print("ob :", self.ob) # print("obs[3:6] :", self.ob[3:6]) self.reward_dist = self.rmse_func(self.ob[3:6]) # print("reward_dist :", self.reward_dist) self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases # print("reward: ", self.reward) # Calculate if the env has been solved done = bool(abs(self.reward_dist) < 0.005) # self._time_lock.release() # ROS 2 code # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def reset(self): """ Reset the agent for a particular experiment condition. """ # Resets the state of the environment and returns an initial observation. rospy.wait_for_service('/gazebo/reset_simulation') try: #reset_proxy.call() self.reset_proxy() except (rospy.ServiceException) as e: print("/gazebo/reset_simulation service call failed") # Unpause simulation rospy.wait_for_service('/gazebo/unpause_physics') try: self.unpause() except (rospy.ServiceException) as e: print("/gazebo/unpause_physics service call failed") # Go to initial position and wait until it arrives there self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) ## time.sleep(int(self.environment['slowness'])) # using seconds # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds time.sleep(1) # # Use moveit to return to the original position # import moveit_commander # import moveit_msgs.msg # import geometry_msgs.msg # # print "============ Starting tutorial setup" # moveit_commander.roscpp_initialize(sys.argv) # # rospy.init_node('move_group_python_interface_tutorial', # # anonymous=True) # robot = moveit_commander.RobotCommander() # scene = moveit_commander.PlanningSceneInterface() # group = moveit_commander.MoveGroupCommander("scara_arm") # group.clear_pose_targets() # roup_variable_values = group.get_current_joint_values() # # print "============ Joint values: ", group_variable_values # group_variable_values[0] = 0.0 # group_variable_values[1] = 0.0 # group_variable_values[2] = 0.0 # group.set_joint_value_target(group_variable_values) # plan2 = group.plan() # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Pause the simulation rospy.wait_for_service('/gazebo/pause_physics') try: self.pause() except (rospy.ServiceException) as e: print("/gazebo/pause_physics service call failed") # Return the corresponding observation return self.ob
class GazeboModularScara3DOFv4Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self, reward_type='sparse', distance_threshold=0.005, n_substeps=20, relative_control=False): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # limit the max episode step self._max_episode_steps = self.max_episode_steps # class variable that iterates to accounts for number of steps per episode self.iterator = 0 # default to 1ms, since ddpg is slow self.slowness = 1000000 self.slowness_unit = 'nsec' self.reset_jnts = True self.choose_robot = 0 self.reward_type = reward_type self.distance_threshold = distance_threshold self.n_substeps = n_substeps self.relative_control = relative_control ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H # EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.3746]) #center of S EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_3joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high, dtype=np.float32) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high, dtype=np.float32) # Seed the environment self._seed() # self.max_steps_episode = 1000 def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True): self.slowness = slowness self.slowness_unit = slowness_unit self.reset_jnts = reset_jnts print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts)) def randomizeTargetPositions(self): """ The goal is to test with randomized positions which range between the boundries of the H-ROS logo """ print("In randomize target positions.") EE_POS_TGT_RANDOM1 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746 ]) # boundry box of the first half H-ROS letters with +-0.01 offset EE_POS_TGT_RANDOM2 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746 ]) # boundry box of the H-ROS letters with +-0.01 offset # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1 ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeTarget(self): print("calling randomize target") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeMultipleTargets(self): print("calling randomize multiple targets") EE_POS_TGT_1 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.0985179, 0.3746]) # center of H right EE_POS_TGT_3 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_4 = np.asmatrix([0.3355224, 0.0344309, 0.3746]) # center of O left EE_POS_TGT_5 = np.asmatrix([0.3013209, 0.1647450, 0.3746]) # S top right EE_POS_TGT_6 = np.asmatrix([0.3349774, 0.1570571, 0.3746]) # S midlle EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 ee_pos_tgt_3 = EE_POS_TGT_3 ee_pos_tgt_4 = EE_POS_TGT_4 ee_pos_tgt_5 = EE_POS_TGT_5 ee_pos_tgt_6 = EE_POS_TGT_6 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) target3 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_3, ee_rot_tgt).T) target4 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_4, ee_rot_tgt).T) target5 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_5, ee_rot_tgt).T) target6 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_6, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ all_targets = [target1, target2, target3, target4, target5, target6] self.realgoal = random.choice(all_targets) # self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeCorrect realgoal: ", self.realgoal) def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # target.time_from_start.nsecs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] # return np.r_[np.reshape(last_observations, -1), # np.reshape(ee_points, -1), # np.reshape(ee_velocities, -1),] return { 'observation': np.reshape(last_observations, -1), 'achieved_goal': current_ee_tgt, 'desired_goal': self.realgoal, 'distance_from_goal': ee_points, } def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def goal_distance(self, goal_a, goal_b): assert goal_a.shape == goal_b.shape # print("goal_distance", np.linalg.norm(goal_a - goal_b, axis=-1)) return np.linalg.norm(goal_a - goal_b, axis=-1) def compute_reward(self, achieved_goal, desired_goal, info): # Compute distance between goal and the achieved goal. d = self.goal_distance(achieved_goal, desired_goal) print("compute_reward:", d) if self.reward_type == 'sparse': return -(d > self.distance_threshold).astype(np.float32) else: return -d def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator += 1 self.reward_dist = -self.rmse_func(self.ob['distance_from_goal']) # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if (self.rmse_func(self.ob['distance_from_goal']) < 0.005): self.reward = 1 - self.rmse_func( self.ob['distance_from_goal'] ) # Make the reward increase as the distance decreases print("Reward is: ", self.reward) else: self.reward = self.reward_dist done = (bool(abs(self.reward_dist) < 0.005)) or ( self.iterator > self.max_episode_steps) self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # print("in step, action: ", action[:3]) # print("in step, observation: ", self.ob[:3]) # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def _reset(self): self.iterator = 0 if self.reset_jnts is True: self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif (self.slowness_unit == 'nsec'): time.sleep(int(self.slowness / 1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class GazeboModularScara4And3DOFv1Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara4_3_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self.urdf_path = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_model_4_and_3joints.urdf" self.slowness = 1 self.slowness_unit = 'sec' #self.slowness = 10000000 #self.slowness_unit = 'nsec' #choose random enviroment at the startup self.randomizeRobot() # Seed the environment self._seed() def init_3dof_robot(self): print("I am in enviroment 3DOF") self._observation_msg = None print(self._observation_msg) self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 self._time_lock = threading.RLock() self.choose_robot = 0 self.environment = None ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'reset_conditions': reset_condition, 'tree_path': self.urdf_path, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # Topics for the robot publisher and subscriber. # self.enviroment = None JOINT_PUBLISHER = '/scara_controller_3dof/command' JOINT_SUBSCRIBER = '/scara_controller_3dof/state' # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub_3dof = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub_3dof = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) def init_4dof_robot(self): print("I am in enviroment 4DOF") self._observation_msg_4dof = None print(self._observation_msg_4dof) self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 self.reset_jnts = False self._time_lock = threading.RLock() self.choose_robot = 1 self.environment = None ############################# # Environment hyperparams ############################# ######################################################################################## ''' Add here the stuff needed for the 4DOF robot ''' EE_POS_TGT_4DOF = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H EE_ROT_TGT_4DOF = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS_4DOF = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES_4DOF = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS_4DOF = np.array([0., 0., 0., 0.]) # JOINT_PUBLISHER_4DOF = '/scara_controller_4dof/command' # JOINT_SUBSCRIBER_4DOF = '/scara_controller_4dof/state' # joint names: MOTOR1_4DOF_JOINT = 'motor1_4dof' MOTOR2_4DOF_JOINT = 'motor2_4dof' MOTOR3_4DOF_JOINT = 'motor3_4dof' MOTOR4_4DOF_JOINT = 'motor4_4dof' # Set constants for links BASE_4DOF = '4_dof_scara_e1_base_link' BASE_4DOF_MOTOR = '4_dof_scara_e1_base_motor' SCARA_4DOF_MOTOR1 = '4_dof_scara_e1_motor1' SCARA_4DOF_INSIDE_MOTOR1 = '4_dof_scara_e1_motor1_inside' SCARA_4DOF_SUPPORT_MOTOR1 = '4_dof_scara_e1_motor1_support' SCARA_4DOF_BAR_MOTOR1 = '4_dof_scara_e1_bar1' SCARA_4DOF_FIXBAR_MOTOR1 = '4_dof_scara_e1_fixbar1' SCARA_4DOF_MOTOR2 = '4_dof_scara_e1_motor2' SCARA_4DOF_INSIDE_MOTOR2 = '4_dof_scara_e1_motor2_inside' SCARA_4DOF_SUPPORT_MOTOR2 = '4_dof_scara_e1_motor2_support' SCARA_4DOF_BAR_MOTOR2 = '4_dof_scara_e1_bar2' SCARA_4DOF_FIXBAR_MOTOR2 = '4_dof_scara_e1_fixbar2' SCARA_4DOF_MOTOR3 = '4_dof_scara_e1_motor3' SCARA_4DOF_INSIDE_MOTOR3 = '4_dof_scara_e1_motor3_inside' SCARA_4DOF_SUPPORT_MOTOR3 = '4_dof_scara_e1_motor3_support' SCARA_4DOF_BAR_MOTOR3 = '4_dof_scara_e1_bar3' SCARA_4DOF_FIXBAR_MOTOR3 = '4_dof_scara_e1_fixbar3' SCARA_4DOF_MOTOR4 = '4_dof_scara_e1_motor4' SCARA_4DOF_INSIDE_MOTOR4 = '4_dof_scara_e1_motor4_inside' SCARA_4DOF_SUPPORT_MOTOR4 = '4_dof_scara_e1_motor4_support' SCARA_4DOF_BAR_MOTOR4 = '4_dof_scara_e1_bar4' SCARA_4DOF_FIXBAR_MOTOR4 = '4_dof_scara_e1_fixbar4' SCARA_4DOF_RANGEFINDER = '4_dof_scara_e1_rangefinder' EE_LINK_4DOF = '4_dof_ee_link' JOINT_ORDER_4DOF = [ MOTOR1_4DOF_JOINT, MOTOR2_4DOF_JOINT, MOTOR3_4DOF_JOINT, MOTOR4_4DOF_JOINT ] LINK_NAMES_4DOF = [ BASE_4DOF, BASE_4DOF_MOTOR, SCARA_4DOF_MOTOR1, SCARA_4DOF_INSIDE_MOTOR1, SCARA_4DOF_SUPPORT_MOTOR1, SCARA_4DOF_BAR_MOTOR1, SCARA_4DOF_FIXBAR_MOTOR1, SCARA_4DOF_MOTOR2, SCARA_4DOF_INSIDE_MOTOR2, SCARA_4DOF_SUPPORT_MOTOR2, SCARA_4DOF_BAR_MOTOR2, SCARA_4DOF_FIXBAR_MOTOR2, SCARA_4DOF_MOTOR3, SCARA_4DOF_INSIDE_MOTOR3, SCARA_4DOF_SUPPORT_MOTOR3, SCARA_4DOF_BAR_MOTOR3, SCARA_4DOF_FIXBAR_MOTOR3, SCARA_4DOF_MOTOR4, SCARA_4DOF_INSIDE_MOTOR4, SCARA_4DOF_SUPPORT_MOTOR4, EE_LINK_4DOF ] reset_condition = { 'initial_positions': INITIAL_JOINTS_4DOF, 'initial_velocities': [] } ################################################################################################################################################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" m_joint_order = copy.deepcopy(JOINT_ORDER_4DOF) m_link_names = copy.deepcopy(LINK_NAMES_4DOF) ee_pos_tgt = EE_POS_TGT_4DOF ee_rot_tgt = EE_ROT_TGT_4DOF # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS_4DOF, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'ee_points_tgt': self.realgoal, 'joint_order': m_joint_order, 'link_names': m_link_names, 'reset_conditions': reset_condition, 'tree_path': self.urdf_path, # 'joint_publisher': m_joint_publishers, # 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS_4DOF, 'end_effector_velocities': EE_VELOCITIES_4DOF, } # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # Topics for the robot publisher and subscriber. # self.enviroment = None JOINT_PUBLISHER_4DOF = '/scara_controller_4dof/command' JOINT_SUBSCRIBER_4DOF = '/scara_controller_4dof/state' # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub_4dof = rospy.Publisher('/scara_controller_4dof/command', JointTrajectory) self._sub_4dof = rospy.Subscriber('/scara_controller_4dof/state', JointTrajectoryControllerState, self.observation_callback_4dof) def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def observation_callback_4dof(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg_4dof = message #def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=True): def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=False): self.slowness = slowness self.slowness_unit = slowness_unit self.reset_jnts = reset_jnts print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts)) def randomizeTarget(self): print("calling randomize target") if self.choose_robot is 0: print("Randomize target for the 3 DoF") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) else: print("Randomize target for the 4 DoF") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeRobot(self): print("calling randomize correct") #try to choose environment if np.random.uniform() < 0.5: self.choose_robot = 0 print("which robot is? ", self.choose_robot) enviroment_test = self.init_3dof_robot() else: self.choose_robot = 1 print("which robot is?", self.choose_robot) enviroment_test = self.init_4dof_robot() def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # print('joint messages: ', message.joint_names) # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False if self.choose_robot is 0: obs_message = self._observation_msg else: obs_message = self._observation_msg_4dof if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state if self.choose_robot is 0: last_observations = np.insert(last_observations, 3, 0.) # print('last_observations_extension: ', last_observations) # last_observations.append(0.0) return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator += 1 self.reward_dist = -self.rmse_func( self.ob[self.scara_chain.getNrOfJoints(): (self.scara_chain.getNrOfJoints() + 3)]) # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) < 0.005): self.reward = 1 - self.rmse_func( self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) # Make the reward increase as the distance decreases print("Reward is: ", self.reward) else: self.reward = self.reward_dist # Calculate if the env has been solved done = bool(abs(self.reward_dist) < 0.005) or (self.iterator > self.max_episode_steps) # Execute "action" if self.choose_robot is 0: self._pub_3dof.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) else: self._pub_4dof.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observations, rewards, etc. ee_point = self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)] + self.realgoal ee_point_eucledian = np.linalg.norm( self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) return self.ob, self.reward, done, {}, ee_point, ee_point_eucledian def _reset(self): """ Reset the agent for a particular experiment condition. """ self.iterator = 0 if self.reset_jnts is True: if self.choose_robot is 0: self._pub_3dof.publish( self.get_trajectory_message( self.environment['reset_conditions'] ['initial_positions'])) elif self.choose_robot is 1: self._pub_4dof.publish( self.get_trajectory_message( self.environment['reset_conditions'] ['initial_positions'])) if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif (self.slowness_unit == 'nsec'): time.sleep(int(self.slowness / 1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class Box3DOFv1Env(gazebo_env.GazeboEnv): def __init__(self): # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "Box_Vision_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self._camera_image = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0, 0, 0]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # # Set the number of seconds per step of a sample. # TIMESTEP = 0.01 # Typically 0.01. # # Set the number of timesteps per sample. # STEP_COUNT = 100 # Typically 100. # # Set the number of samples per condition. # SAMPLE_COUNT = 5 # Typically 5. # # Set the number of trajectory iterations to collect. # ITERATIONS = 20 # Typically 10. # How much time does it take to execute the trajectory (in seconds) slowness = 2 # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# ############################# rospack = rospkg.RosPack() prefix_path = rospack.get_path('scara_e1_description') if (prefix_path == None): print("I can't find scara_e1_description") sys.exit(0) URDF_PATH = prefix_path + "/urdf/scara_e1_model_3joints_simplified_camera_behind.urdf.xacro" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.environment = { # 'dt': TIMESTEP, 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) self._image_sub = rospy.Subscriber('/scara/camera/image_raw', Image, self.camera_callback) #self._sub = rospy.Subscriber('/scara/camera', JointTrajectoryControllerState, self.observation_callback) # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions # rclpy.init(args=None) # self.ros2_node = rclpy.create_node('robot_ai_node') # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER) # # self._callbacks = partial(self.observation_callback, robot_id=0) # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data) # # self._time_lock = threading.RLock() # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = 9 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) # Seed the environment self._seed() model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = 0.055 #EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message # print("!!!!!!!!!Received an observation") def camera_callback(self, message): """ Callback method for the subscriber of Image """ self._camera_image = message print("\ncamera_image.height", self._camera_image.height) print("\ncamera_image.width", self._camera_image.width) def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. target.time_from_start.secs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(3) # Initialize a joint array for the present 6 joint angles. angles = JntArray(3) # Construct the joint array from the most recent joint angles. for i in range(3): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, 3) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def addObstacle(self): print("\nADD OBSTACLE") rospy.wait_for_service('/gazebo/spawn_urdf_model') try: # #NORMAL BOX model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <box size=\".05 .05 .05\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"red\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"10.\"/>\ <inertia ixx=\"1\" ixy=\".0\" ixz=\"0.0\" iyy=\"1\" iyz=\"0.0\" izz=\"1\"/>\ </inertial>\ <collision>\ <geometry>\ <box size=\".05 .05 .05\"/>\ </geometry>\ </collision>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0.3\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/Red</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() number = random.randint(0, 4) #pose.position.x = 0.15;# pose.position.x = 0.3 # pose.position.y = 0.07 # pose.position.z = 0.05 pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" self.add_model(model_name="obstacle", model_xml=model_xml, robot_namespace=robot_namespace, initial_pose=pose, reference_frame="") except (rospy.ServiceException) as e: print("/gazebo/spawn_urdf_model service call failed") def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False # # ROS 2, Acquire the lock to prevent the subscriber thread from # # updating times or observation messages. # self._time_lock.acquire(True) obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:3], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - observation - reward - done (status) - dictionary (#TODO clarify) """ # Unpause simulation rospy.wait_for_service('/gazebo/unpause_physics') try: self.unpause() except (rospy.ServiceException) as e: print("/gazebo/unpause_physics service call failed") # Execute "action" # if rclpy.ok(): # ROS 2 code self._pub.publish(self.get_trajectory_message(action[:3])) # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Pause simulation rospy.wait_for_service('/gazebo/pause_physics') try: #resp_pause = pause.call() self.pause() except (rospy.ServiceException) as e: print("/gazebo/pause_physics service call failed") # Take an observation # TODO: program this better, check that ob is not None, etc. # self.ob = take_observation() # # Old implementation for reward calculation # # Calculate reward based on observation # if np.linalg.norm(self.ob[1]) < 0.005: # self.reward_dist = 1000.0 * np.linalg.norm(self.ob[1])#- 10.0 * np.linalg.norm(ee_points) # # self.reward_ctrl = np.linalg.norm(action)#np.square(action).sum() # done = True # print("self.reward_dist: ", self.reward_dist, "self.reward_ctrl: ", self.reward_ctrl) # else: # self.reward_dist = - np.linalg.norm(self.ob[1]) # # self.reward_ctrl = - np.linalg.norm(action)# np.square(action).sum() # # self.reward = 2.0 * self.reward_dist + 0.01 * self.reward_ctrl # #removed the control reward, maybe we should add it later. # self.reward = self.reward_dist # Calculate the reward # Heuristic used for the calculation of the reward: # - calculate the residual mean square error (rmse) between the current # end effector point and the target point # - in the case it's bigger than 5 mm, reward is the negative value of the rmse # - in case where it's smaller than 5 mm, reward is calculated by substracting 100 - rmse # print(self.ob[2]) self.reward_dist = -self.rmse_func(self.ob[2]) if abs(self.reward_dist) < 0.005: # print("reward (Eucledian dist (mm)): ", -1000 * self.reward_dist) self.reward = 100 + self.reward_dist else: # print("reward (Eucledian dist (mm)): ", -1000 * self.reward_dist) self.reward = self.reward_dist # print("reward: ", self.reward) # Calculate if the env has been solved done = bool(abs(self.reward_dist) < 0.005) # self._time_lock.release() # ROS 2 code # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def _reset(self): """ Reset the agent for a particular experiment condition. """ # Resets the state of the environment and returns an initial observation. rospy.wait_for_service('/gazebo/reset_simulation') try: #reset_proxy.call() self.reset_proxy() except (rospy.ServiceException) as e: print("/gazebo/reset_simulation service call failed") # Unpause simulation rospy.wait_for_service('/gazebo/unpause_physics') try: self.unpause() except (rospy.ServiceException) as e: print("/gazebo/unpause_physics service call failed") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Pause the simulation rospy.wait_for_service('/gazebo/pause_physics') try: self.pause() except (rospy.ServiceException) as e: print("/gazebo/pause_physics service call failed") # Return the corresponding observation return self.ob
class GazeboNewArticulatedv1Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "NewArticulatedArm_v1.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.2, 0.2, 0.2868]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' # Set constants for links BASE = 'new_arm_base_link' NEW_ART_MOTOR1 = 'motor1_link' NEW_ART_MOTOR2 = 'link1' NEW_ART_MOTOR3 = 'motor3_link' NEW_ART_MOTOR4 = 'motor4_link' NEW_ART_MOTOR5 = 'motor5_link' # EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT ] LINK_NAMES = [ BASE, NEW_ART_MOTOR1, NEW_ART_MOTOR2, NEW_ART_MOTOR3, NEW_ART_MOTOR4, NEW_ART_MOTOR5 ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/new_articulated/new_arm.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/new_arm_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/new_arm_controller/state', JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # # Gazebo specific services to start/stop its behavior and # # facilitate the overall RL environment # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) # self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) # model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = 0.055 #EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") #self.addObstacle() # Seed the environment # Seed the environment self._seed() def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True): self.slowness = slowness self.slowness_unit = slowness_unit self.reset_jnts = reset_jnts print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts)) def randomizeTargetPositions(self): """ The goal is to test with randomized positions which range between the boundries of the H-ROS logo """ print("In randomize target positions.") EE_POS_TGT_RANDOM1 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the first half H-ROS letters with +-0.01 offset EE_POS_TGT_RANDOM2 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the H-ROS letters with +-0.01 offset # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1 ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeTarget(self): print("calling randomize target") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.2868]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.2868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeMultipleTargets(self): print("calling randomize multiple target") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.2868]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.2868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator += 1 # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # Take an observation # TODO: program this better, check that ob is not None, etc. # self.ob = take_observation() # self.reward_dist = self.rmse_func(self.ob[3:6]) # # print("reward_dist :", self.reward_dist) # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases # # # Calculate if the env has been solved # done = bool(abs(self.reward_dist) < 0.005) self.reward_dist = -self.rmse_func( self.ob[self.scara_chain.getNrOfJoints(): (self.scara_chain.getNrOfJoints() + 3)]) # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) < 0.005): self.reward = 1 - self.rmse_func( self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) # Make the reward increase as the distance decreases print("Reward is: ", self.reward) else: self.reward = self.reward_dist # print("reward: ", self.reward) # print("rmse_func: ", self.rmse_func(ee_points)) # Calculate if the env has been solved done = bool(abs(self.reward_dist) < 0.005) or (self.iterator > self.max_episode_steps) # # Unpause simulation # rospy.wait_for_service('/gazebo/unpause_physics') # try: # self.unpause() # except (rospy.ServiceException) as e: # print ("/gazebo/unpause_physics service call failed") # Execute "action" self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) #TODO: wait until action gets executed # When adding this the algorithm does not converge # time.sleep(int(self.environment['slowness'])) # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def goToInit(self): self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # # Go to initial position and wait until it arrives there # Wait until the arm is within epsilon of reset configuration. self._time_lock.acquire(True, -1) with self._time_lock: self._currently_resetting = True self._time_lock.release() if self._currently_resetting: epsilon = 1e-3 reset_action = self.environment['reset_conditions'][ 'initial_positions'] now_action = self._observation_msg.actual.positions du = np.linalg.norm(reset_action - now_action, float(np.inf)) self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if du > epsilon: self._currently_resetting = True self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions'] ['initial_positions'])) time.sleep(3) def _reset(self): """ Reset the agent for a particular experiment condition. """ self.iterator = 0 # self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])) # time.sleep(int(self.environment['slowness'])) # using seconds # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds # # time.sleep(int(self.environment['slowness'])) if self.reset_jnts is True: self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif (self.slowness_unit == 'nsec'): time.sleep(int(self.slowness / 1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class ARIACPickv0Env(gazebo_env.GazeboEnv): def __init__(self): self.slowness = 1 self.slowness_unit = 'sec' # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ARIACPick_v0.launch") JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # Set constants for joints SHOULDER_PAN_JOINT = 'shoulder_pan_joint' SHOULDER_LIFT_JOINT = 'shoulder_lift_joint' ELBOW_JOINT = 'elbow_joint' WRIST_1_JOINT = 'wrist_1_joint' WRIST_2_JOINT = 'wrist_2_joint' WRIST_3_JOINT = 'wrist_3_joint' # Set constants for links BASE = 'base' BASE_LINK = 'base_link' SHOULDER_LINK = 'shoulder_link' UPPER_ARM_LINK = 'upper_arm_link' FOREARM_LINK = 'forearm_link' WRIST_1_LINK = 'wrist_1_link' WRIST_2_LINK = 'wrist_2_link' WRIST_3_LINK = 'wrist_3_link' EE_LINK = 'ee_link' JOINT_ORDER = [SHOULDER_PAN_JOINT, SHOULDER_LIFT_JOINT, ELBOW_JOINT, WRIST_1_JOINT, WRIST_2_JOINT, WRIST_3_JOINT] # JOINT_ORDER= ['linear_arm_actuator_joint', 'shoulder_pan_joint', 'shoulder_lift_joint', 'elbow_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint'] LINK_NAMES = [BASE, BASE_LINK, SHOULDER_LINK, UPPER_ARM_LINK, FOREARM_LINK, WRIST_1_LINK, WRIST_2_LINK, WRIST_3_LINK, EE_LINK] LINK_NAMES2 = [ BASE, BASE_LINK, SHOULDER_LINK, UPPER_ARM_LINK, FOREARM_LINK, WRIST_1_LINK, WRIST_2_LINK, WRIST_3_LINK, EE_LINK ] EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) INITIAL_JOINTS = np.array([0., 0., 0.]) STEP_COUNT = 2 # Typically 100. reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/erle/ros_scara/ur10.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } rospy.wait_for_service('/ariac/gripper/control') self.vacuum_gripper_control = rospy.ServiceProxy('/ariac/gripper/control', VacuumGripperControl) self._pub = rospy.Publisher('/ariac/arm/command', JointTrajectory) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self.step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # Seed the environment self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): state = [] reward = [] done = False data_arm_state = None while data_arm_state is None: try: data_arm_state = rospy.wait_for_message('/ariac/arm/state', JointTrajectoryControllerState, timeout=5) except: pass data_arm_state2 = JointTrajectoryControllerState() data_arm_state2.joint_names = data_arm_state.joint_names[1:] data_arm_state2.desired.positions = data_arm_state.desired.positions[1:] data_arm_state2.desired.velocities = data_arm_state.desired.velocities[1:] data_arm_state2.desired.accelerations = data_arm_state.desired.accelerations[1:] data_arm_state2.actual.positions = data_arm_state.actual.positions[1:] data_arm_state2.actual.velocities = data_arm_state.actual.velocities[1:] data_arm_state2.actual.accelerations = data_arm_state.actual.accelerations[1:] data_arm_state2.error.positions = data_arm_state.error.positions[1:] data_arm_state2.error.velocities = data_arm_state.error.velocities[1:] data_arm_state2.error.accelerations = data_arm_state.error.accelerations[1:] # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(data_arm_state2, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics(self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal#self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities(ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1),] self._pub.publish(self.get_trajectory_message([0.018231524968342513, 3.2196073949389703 + np.random.rand(1)[0] - .5, -0.6542426695478509 + np.random.rand(1)[0] - .5, 1.7401498071871018 + np.random.rand(1)[0] - .5, 3.7354939354077596 + np.random.rand(1)[0] - .5, -1.510707301072792 + np.random.rand(1)[0] - .5, 0.00014565660628829136 + np.random.rand(1)[0] - .5])) data_vacuum_state = None while data_vacuum_state is None: try: data_vacuum_state = rospy.wait_for_message('/ariac/gripper/state', VacuumGripperState, timeout=5) except: pass data_camera = None while data_camera is None: try: data_camera = rospy.wait_for_message('/ariac/logical_camera_1', LogicalCameraImage, timeout=5) except: pass try: self.vacuum_gripper_control(enable=bool(state[0])) except (rospy.ServiceException) as e: print ("/gazebo/reset_simulation service call failed") state = [data_arm_state, data_camera, data_vacuum_state] return state, reward, done, {} def reset(self): data_arm_state = None while data_arm_state is None: try: data_arm_state = rospy.wait_for_message('/ariac/arm/state', JointTrajectoryControllerState, timeout=5) except: pass data_camera = None while data_camera is None: try: data_camera = rospy.wait_for_message('/ariac/logical_camera_1', LogicalCameraImage, timeout=5) except: pass state = [data_arm_state, data_camera] return state def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = copy.deepcopy(self.environment['joint_order']) action_msg.joint_names.insert(0, 'linear_arm_actuator_joint') # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # target.time_from_start.nsecs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty"); # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all(map(lambda x,y: x in y, message.joint_names, [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array(message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1)
class GazeboModularScara3DOFStaticObstaclev0Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # limit the max episode step # class variable that iterates to accounts for number of steps per episode self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # make sure that the slowness is set properly!! In case we forget them defaults to seconds. # if self.slowness is None: # slowness = 1 # # else: # slowness = self.slowness # slowness = 100000000 # 1 is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" #URDF_PATH = "/home/erle/scara_e1_description/urdf/scara_e1_3joints.urdf" URDF_PATH = "/home/erle/ros_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0,0]; pose.position.y = EE_POS_TGT[0,1]; pose.position.z = 0.055#EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0; pose.orientation.y= 0; pose.orientation.z = 0; pose.orientation.w = 0; reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") #self.addObstacle() # Seed the environment self._seed() # self.max_steps_episode = 1000 def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=False): self.slowness = slowness self.slowness_unit = slowness_unit print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) def randomizeObstacle(self): print("calling randomize obstacle") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ #self.realgoal = target1 if np.random.uniform() < 0.5 else target2 if np.random.uniform() < 0.5: self.addObstacle() self.realgoal = target2 print("\n\nOBSTACLE\n\n") else: self.removeObstacle() self.realgoal = target1 print("\n\nNO OBSTACLE\n\n") print("randomizeObstacle realgoal: ", self.realgoal) #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])]) # 0 = obstacle. 1 = no obstacle. # self.realgoal = 0 # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H #def randomizeCorrect(self): def randomizeTarget(self): print("calling randomize correct") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])]) # 0 = obstacle. 1 = no obstacle. # self.realgoal = 0 # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # target.time_from_start.nsecs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty"); # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all(map(lambda x,y: x in y, message.joint_names, [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array(message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics(self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal#self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities(ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1),] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1),] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator+=1 # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # Take an observation # TODO: program this better, check that ob is not None, etc. # self.ob = take_observation() # self.reward_dist = self.rmse_func(self.ob[3:6]) # # print("reward_dist :", self.reward_dist) # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases # # # Calculate if the env has been solved # done = bool(abs(self.reward_dist) < 0.005) # reward as in the enviroment for 4DOF commented out self.reward_dist = -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005): self.reward = 1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases print("Reward is: ", self.reward) else: self.reward = self.reward_dist # print("reward: ", self.reward) # print("rmse_func: ", self.rmse_func(ee_points)) # # enviroment V2 reward # self.reward_dist = self.rmse_func(self.ob[3:6]) # # print("reward_dist :", self.reward_dist) # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases # Calculate if the env has been solved done = (bool(abs(self.reward_dist) < 0.005)) or (self.iterator > self.max_episode_steps) # # Unpause simulation # rospy.wait_for_service('/gazebo/unpause_physics') # try: # self.unpause() # except (rospy.ServiceException) as e: # print ("/gazebo/unpause_physics service call failed") # Execute "action" # if rclpy.ok(): # ROS 2 code self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) #TODO: wait until action gets executed # When adding this the algorithm does not converge # time.sleep(int(self.environment['slowness'])) # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while(self.ob is None): self.ob = self.take_observation() # print("in step, action: ", action[:3]) # print("in step, observation: ", self.ob[:3]) # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def addObstacle(self): rospy.wait_for_service('/gazebo/spawn_urdf_model') # <material name=\"green\">\ # <color rgba=\"0 0.8 .8 1\"/>\ # </material>\ try: model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <box size=\".04 .04 1\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"blue\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ <collision>\ <geometry>\ <box size=\".04 .04 1\"/>\ </geometry>\ <contact>\ <ode>\ <soft_cfm>0.000000</soft_cfm>\ <soft_erp>0.200000</soft_erp>\ <kp>1000000000000.000000</kp>\ <kd>1.000000</kd>\ <max_vel>100.000000</max_vel>\ <min_depth>0.00001000</min_depth>\ </ode>\ </contact>\ </collision>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0.5\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/Blue</material>\ </gazebo>\ </robot>" #BOX # model_xml = "<?xml version=\"1.0\"?> \ # <robot name=\"myfirst\"> \ # <link name=\"world\"> \ # </link>\ # <link name=\"box0\">\ # <visual>\ # <geometry>\ # <box>\ # <size>0.2 0.2 1.0</size>\ # </box>\ # </geometry>\ # <origin xyz=\"0 0 0\"/>\ # <material name=\"blue\">\ # <ambient>0.5 0.5 1.0 0.1</ambient>\ # <diffuse>0.5 0.5 1.0 0.1</diffuse>\ # </material>\ # </visual>\ # <inertial>\ # <mass value=\"5.0\"/>\ # <inertia>\ # <ixx>1.0</ixx>\ # <ixy>0</ixy>\ # <ixz>0</ixz>\ # <iyy>1.0</iyy>\ # <iyz>0</iyz>\ # <izz>1.0</izz>\ # </inertia>\ # </inertial>\ # <collision>\ # <geometry>\ # <box>\ # <size>0.2 0.2 1.0</size>\ # </box>\ # </geometry>\ # </collision>\ # </link>\ # <joint name=\"world_to_base\" type=\"fixed\"> \ # <origin xyz=\"0 0 0.7\" rpy=\"0 0 0\"/>\ # <parent link=\"world\"/>\ # <child link=\"box0\"/>\ # </joint>\ # <gazebo reference=\"box0\">\ # <material>Gazebo/Blue</material>\ # </gazebo>\ # </robot>" # model_xml = "<?xml version=\"1.0\"?> \ # <robot name=\"myfirst\"> \ # <link name=\"world\"> \ # </link>\ # <link name=\"cylinder0\">\ # <visual>\ # <geometry>\ # <sphere radius=\"0.01\"/>\ # </geometry>\ # <origin xyz=\"0 0 0\"/>\ # <material name=\"rojotransparente\">\ # <ambient>0.5 0.5 1.0 0.1</ambient>\ # <diffuse>0.5 0.5 1.0 0.1</diffuse>\ # </material>\ # </visual>\ # <inertial>\ # <mass value=\"5.0\"/>\ # <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ # </inertial>\ # </link>\ # <joint name=\"world_to_base\" type=\"fixed\"> \ # <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ # <parent link=\"world\"/>\ # <child link=\"cylinder0\"/>\ # </joint>\ # <gazebo reference=\"cylinder0\">\ # <material>Gazebo/GreenTransparent</material>\ # </gazebo>\ # </robot>" robot_namespace = "" pose = Pose() number = random.randint(0, 4) # if number == 1: # pose.position.x = 0.25; # pose.position.y = 0.07; # if number == 2: # pose.position.x = 0.25; # pose.position.y = -0.07; # if number == 3: # pose.position.x = 0.23; # pose.position.y = -0.087; # if number == 4: # pose.position.x = 0.23; # pose.position.y = 0.087; pose.position.x = 0.25;# pose.position.y = 0.07;# pose.position.z = 0.0; pose.orientation.x = 0; pose.orientation.y= 0; pose.orientation.z = 0; pose.orientation.w = 0; reference_frame = "" if number > -1: self.add_model(model_name="obstacle", model_xml=model_xml, robot_namespace=robot_namespace, initial_pose=pose, reference_frame="") except (rospy.ServiceException) as e: print ("/gazebo/spawn_urdf_model service call failed") def removeObstacle(self): rospy.wait_for_service('/gazebo/delete_model') try: self.remove_model(model_name="obstacle") except (rospy.ServiceException) as e: print ("/gazebo/spawn_urdf_model service call failed") def _reset(self): # """ # Reset the agent for a particular experiment condition. # """ # # Resets the state of the environment and returns an initial observation. # rospy.wait_for_service('/gazebo/reset_simulation') # try: # #reset_proxy.call() # self.reset_proxy() # except (rospy.ServiceException) as e: # print ("/gazebo/reset_simulation service call failed") #., # # Unpause simulation # rospy.wait_for_service('/gazebo/unpause_physics') # try: # self.unpause() # except (rospy.ServiceException) as e: # print ("/gazebo/unpause_physics service call failed") # Go to initial position and wait until it arrives there # self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])) # time.sleep(int(self.environment['slowness'])) # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds # # Pause the simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # try: # self.remove_model(model_name="obstacle") # except (rospy.ServiceException) as e: # print ("/gazebo/spawn_urdf_model service call failed") #remove previous spawned model in order to avoid respawning model with same name # rospy.wait_for_service('/gazebo/delete_model') # try: # self.remove_model(model_name="obstacle") # except (rospy.ServiceException) as e: # print ("/gazebo/spawn_urdf_model service call failed") #remove previous spawned model in order to avoid respawning model with same name self.iterator = 0 msg = self.get_trajectory_message(self.environment['reset_conditions']['initial_positions']) msg.points[0].time_from_start.secs = 0 msg.points[0].time_from_start.nsecs = 10000 self._pub.publish(msg) # time.sleep(int(self.environment['slowness'])) # using seconds # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif(self.slowness_unit == 'nsec'): time.sleep(int(self.slowness/1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while(self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class RealModularMara3DOFv0Env(real_env.RealEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name real_env.RealEnv.__init__(self) # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self.detect_target_once = 1 self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([-0.48731417, -0.00258965, 0.82467501]) EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position # INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' # joint names: MOTOR1_JOINT = 'hros_actuation_servomotor_000A3500014E2' MOTOR2_JOINT = 'hros_actuation_servomotor_000A3500014E' MOTOR3_JOINT = 'hros_actuation_servomotor_000A3500018E' MOTOR4_JOINT = 'hros_actuation_servomotor_000A3500018E2' MOTOR5_JOINT = 'hros_actuation_servomotor_000A3500016E' MOTOR6_JOINT = 'hros_actuation_servomotor_000A3500016E2' # Set constants for links TABLE = 'table' BASE = 'base_link' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT] LINK_NAMES = [TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = '/home/erle/ros_catkin_ws/src/mara/mara_description/urdf/mara_demo_camera_top.urdf' m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'ee_point_tgt_orient': self.target_orientation, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) TARGET_SUBSCRIBER = '/mara/target' self._sub_tgt = rospy.Subscriber(TARGET_SUBSCRIBER, Pose, self.tgt_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 recently also added quaternion to the obs, which has dimension=4 """ self.obs_dim = self.scara_chain.getNrOfJoints() + 10 #6 hardcode it for now # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.add_model_sdf = rospy.ServiceProxy('/gazebo/spawn_sdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) self.pub_set_model = rospy.Publisher('/gazebo/set_model_state', ModelState, queue_size=1) # Seed the environment # Seed the environment self._seed() self.kf = [] for i in range(6): # self.kf.append(KalmanFilter(initial_state_mean=0, # transition_matrices=[1], # observation_matrices=[1], # initial_state_covariance=1, # observation_covariance=5, # transition_covariance=0.001, # n_dim_obs=1, n_dim_state=1, # em_vars=['transition_covariance', 'observation_covariance', 'initial_state_covariance'])) # self.kf.append(KalmanFilter(initial_state_mean=0, # transition_matrices=[1], # observation_matrices=[1], # initial_state_covariance=1, # observation_covariance=1, # transition_covariance=0.1, # n_dim_obs=1, n_dim_state=1)) self.kf.append(RingBuffer(500)) def tgt_callback(self,msg): # print("Whats the target?: ", msg) if self.detect_target_once is 1: print("Get the target from vision, for now just use position.") EE_POS_TGT = np.asmatrix([msg.position.x, msg.position.y, msg.position.z]) EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt = EE_POS_TGT # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 print("Predicted target is: ", self.realgoal) self.detect_target_once = 0 def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ # index = 0 # for value in message.actual.positions: # message.actual.positions[index] = value * 3.14/180 # index = index + 1 self._observation_msg = message def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=True): self.slowness = slowness self.slowness_unit = slowness_unit self.reset_jnts = reset_jnts print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts)) def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] index = 0 # print(self.kf) print(action_float) for value in action_float: # # self.kf[index] = self.kf[index].em(value, n_iter=5) # (filtered_state_means, filtered_state_covariances) = self.kf[index].filter(value) # (smoothed_state_means, smoothed_state_covariances) = self.kf[index].smooth(value) # action_float[index] = smoothed_state_means self.kf[index].append(value) action_float[index] = self.kf[index].mean() index = index + 1 print(action_float) print('---------------------') target.positions = action_float target.velocities = [0]*6 target.effort = [float('nan')]*6 # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty"); # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all(map(lambda x,y: x in y, message.joint_names, [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array(message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics(self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) tgt_quartenion = quaternion_from_matrix(self.target_orientation) A = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T quat_error = current_quaternion - tgt_quartenion current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal#self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities(ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(quat_error, -1), np.reshape(ee_velocities, -1),] return state def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator+=1 self.reward_dist = -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) self.reward_orient = - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)]) #scale here the orientation because it should not be the main bias of the reward, position should be orientation_scale = 0.01 # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005): self.reward = 1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases print("Reward is: ", self.reward) else: self.reward = self.reward_dist if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])<0.05): self.reward = self.reward + orientation_scale * (1 -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)])) print("Reward orientation is: ", self.reward) else: self.reward = self.reward + orientation_scale * self.rmse_func(self.ob[self.scara_chain.getNrOfJoints()+3:(self.scara_chain.getNrOfJoints()+7)]) # Calculate if the env has been solved done = bool(((abs(self.reward_dist) < 0.005) and (abs(self.reward_orient)) < 0.05) or (self.iterator>self.max_episode_steps)) # Execute "action" self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while(self.ob is None): self.ob = self.take_observation() # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def publish_last_position(self, values): self._pub.publish(self.get_trajectory_message(values[:self.scara_chain.getNrOfJoints()])) def goToInit(self): self.ob = self.take_observation() while(self.ob is None): self.ob = self.take_observation() # # Go to initial position and wait until it arrives there # Wait until the arm is within epsilon of reset configuration. self._time_lock.acquire(True, -1) with self._time_lock: self._currently_resetting = True self._time_lock.release() if self._currently_resetting: epsilon = 1e-3 reset_action = self.environment['reset_conditions']['initial_positions'] now_action = self._observation_msg.actual.positions du = np.linalg.norm(reset_action-now_action, float(np.inf)) self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])) if du > epsilon: self._currently_resetting = True self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])) time.sleep(3) def reset(self): """ Reset the agent for a particular experiment condition. """ self.iterator = 0 if self.reset_jnts is True: self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])) if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif(self.slowness_unit == 'nsec'): time.sleep(int(self.slowness/1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while(self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
class GazeboMARATopOrientCollisionv0Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "MARATop6DOF_Collision_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self._collision_msg = None self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([-0.40028, 0.095615, 0.72466]) # alex2 # EE_POS_TGT = np.asmatrix([-0.580238, -0.179591, 0.72466]) # rubik touching the bar # EE_ROT_TGT = np.asmatrix([[-0.00128296, 0.9999805 , 0.00611158], # [ 0.00231397, -0.0061086 , 0.99997867], # [ 0.9999965 , 0.00129708, -0.00230609]]) # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981]) # EE_POS_TGT = np.asmatrix([-0.4023037912211465, 0.15501116706606247, 0.7238499613771884]) # 200 cm from the z axis # # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) # original orientation EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position # INITIAL_JOINTS = np.array([0., 0., 1., 0., 1.57, 0.]) INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' RAND_LIGHT_PUBLISHER = '/randomizers/randomizer/light' RAND_SKY_PUBLISHER = '/randomizers/randomizer/sky' RAND_PHYSICS_PUBLISHER = '/randomizers/randomizer/physics' LINK_STATE_PUBLISHER = '/gazebo/set_link_state' RAND_OBSTACLES_PUBLISHER = '/randomizers/randomizer/obstacles' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' MOTOR6_JOINT = 'motor6' # Set constants for links TABLE = 'table' BASE = 'base_link' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT ] LINK_NAMES = [ TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = rospkg.RosPack().get_path( "mara_description") + "/urdf/mara_demo_camera_top.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'ee_point_tgt_orient': self.target_orientation, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState, self.collision_callback) self._pub_rand_light = rospy.Publisher(RAND_LIGHT_PUBLISHER, stdEmpty) self._pub_rand_sky = rospy.Publisher(RAND_SKY_PUBLISHER, stdEmpty) self._pub_rand_physics = rospy.Publisher(RAND_PHYSICS_PUBLISHER, stdEmpty) self._pub_rand_obstacles = rospy.Publisher(RAND_OBSTACLES_PUBLISHER, stdEmpty) self._pub_link_state = rospy.Publisher(LINK_STATE_PUBLISHER, LinkState) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 recently also added quaternion to the obs, which has dimension=4 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 9 #7 #6 hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.inf * np.ones(self.scara_chain.getNrOfJoints()) # high = np.inf * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high, dtype=np.float32) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high, dtype=np.float32) self.add_model_urdf = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.add_model_sdf = rospy.ServiceProxy('/gazebo/spawn_sdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) self.set_model_pose = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_model_state = rospy.ServiceProxy('/gazebo/get_model_state', GetModelState) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = EE_POS_TGT[0, 2] pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" self.envs_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) file_xml = open(self.envs_path + '/assets/urdf/target/point.urdf', mode='r') model_xml = file_xml.read() file_xml.close() rospy.wait_for_service('/gazebo/spawn_urdf_model') try: self.add_model_urdf(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="world") except rospy.ServiceException as e: print('Error adding urdf model') # # self.obj_path = self.envs_path + '/assets/urdf/objs/rubik_cube/rubik_cube_random.sdf' # # self.obj_path = self.envs_path + '/assets/urdf/objs/rubik_cube/rubik_cube.sdf' # self.obj_path = self.envs_path + '/assets/urdf/objs/box.sdf' #0.067 0.067 0.067 # # self.obj_path = self.envs_path + '/assets/urdf/objs/cylinder.sdf' # radius = 0.03 length = 0.08 # # self.obj_path = self.envs_path + '/assets/urdf/objs/sphere.urdf' #radius = 0.033 # file_sdf = open(self.obj_path ,mode='r') # model_sdf = file_sdf.read() # file_sdf.close() # # rospy.wait_for_service('/gazebo/spawn_sdf_model') # # rospy.wait_for_service('/gazebo/spawn_urdf_model') # try: # # self.add_model_urdf(model_name="obj", # self.add_model_sdf(model_name="obj", # model_xml=model_sdf, # robot_namespace="", # initial_pose=pose, # reference_frame="world") # except rospy.ServiceException as e: # print('Error adding sdf model') self._seed() def collision_callback(self, message): """ Callback method for the subscriber of Collision data """ self._collision_msg = None # if message.collision1_name is not message.collision2_name: # if "objs" not in message.collision1_name and "obj" not in message.collision2_name: # if "obj" not in message.collision1_name or "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name: # if "robot::motor6_link::motor6_link_fixed_joint_lump__robotiq_arg2f_base_link_collision_1" not in message.collision1_name and "robot::left_outer_finger::left_outer_finger_collision" not in message.collision2_name: # self._collision_msg = message if message.collision1_name is not message.collision2_name: if "obj" not in message.collision1_name and "obj" not in message.collision2_name: if "obj" not in message.collision1_name and "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name: if "robot::motor6_link::motor6_link_fixed_joint_lump__robotiq_arg2f_base_link_collision_1" not in message.collision1_name and "robot::left_outer_finger::left_outer_finger_collision" not in message.collision2_name: if "obstacle" not in message.collision1_name and "robot::table::table_fixed_joint_lump__mara_work_area_link_collision_4" not in message.collision2_name: if "obstacle" not in message.collision1_name and "obj" not in "message.collision2_name": self._collision_msg = message def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=True): self.slowness = slowness self.slowness_unit = slowness_unit self.reset_jnts = reset_jnts print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts)) def getModelFile(self, path): if path.endswith('.sdf'): return "sdf" elif path.endswith('.urdf'): return "urdf" else: raise TypeError('the file must be .sdf or .urdf') def randomizeSize(self, current_obj_name, shape): f = open(self.obj_path, 'r+') model_xml = f.read() f.seek(0) min_size = 0.03 max_size = 0.1 if shape != 'box' and shape != 'cylinder' and shape != 'sphere': raise TypeError("Shape must be 'box', 'cylinder' or 'sphere'") if shape == "box": size_x = str(round(np.random.uniform(min_size, max_size), 3)) size_y = str(round(np.random.uniform(min_size, max_size), 3)) size_z = str(round(np.random.uniform(min_size, max_size), 3)) # height position of the wooden part of the table + height of the obj z = 0.69525 + float(size_z) / 2 else: radius = str(round(np.random.uniform(min_size, max_size / 2), 3)) z = 0.69525 + float(radius) if shape == "cylinder": length = str(round(np.random.uniform(min_size, max_size), 3)) z = 0.69525 + float(length) / 2 model_file = self.getModelFile(self.obj_path) if model_file == "sdf": if shape == "box": to_replace = model_xml[model_xml.find('<size>') + len('<size>'):model_xml.find('</size>')] model_xml = model_xml.replace( to_replace, size_x + ' ' + size_y + ' ' + size_z) else: to_replace = model_xml[model_xml.find('<radius>') + len('<radius>'):model_xml. find('</radius>')] model_xml = model_xml.replace(to_replace, radius) if shape == "cylinder": to_replace = model_xml[model_xml.find('<length>') + len('<length>'):model_xml. find('</length>')] model_xml = model_xml.replace(to_replace, length) else: if shape == "box": to_replace = model_xml[model_xml.find('<box ') + len('<box '):model_xml.find('/>')] model_xml = model_xml.replace( to_replace, 'size=' + '"' + size_x + ' ' + size_y + ' ' + size_z + '"') else: to_replace = model_xml[model_xml.find('<sphere ') + len('<sphere '):model_xml.find('/>')] model_xml = model_xml.replace(to_replace, 'radius=' + '"' + radius + '"') if shape == "cylinder": to_replace = model_xml[model_xml.find('<cylinder ') + len('<cylinder '):model_xml. find('/>')] model_xml = model_xml.replace( to_replace, 'length=' + '"' + length + '" ' + 'radius=' + '"' + radius + '"') f.truncate() f.write(model_xml) f.close() self.randomizeObjectType(current_obj_name=current_obj_name, replace=self.obj_path) self.randomizeTargetPose(obj_name=current_obj_name, centerPoint=z) def random_texture(self): material_path = self.envs_path + "/assets/urdf/Media/materials/scripts/textures.material" m = open(material_path, 'r') textures = [] for t in m: if t.startswith("material "): textures.append(t[9:-1]) rand_texture = np.random.choice(textures) return rand_texture, textures def randomizeTexture(self, current_obj_name): f = open(self.obj_path, 'r+') model_xml = f.read() f.seek(0) new_texture, list_textures = self.random_texture() for lt in list_textures: if lt != str(new_texture): model_xml = model_xml.replace(lt, new_texture) f.truncate() f.write(model_xml) f.close() self.randomizeObjectType(current_obj_name=current_obj_name, replace=self.obj_path) def randomizeObjectType(self, current_obj_name=None, list_obj=None, replace=None): obj = ModelState() rospy.wait_for_service('gazebo/get_model_state') try: obj = self.get_model_state("target", '') except rospy.ServiceException as e: print("Error getting the model state") rospy.wait_for_service('/gazebo/delete_model') try: self.remove_model(current_obj_name) except rospy.ServiceException as e: print("Error removing model") if replace is None: random_obj = np.random.choice(list_obj) self.obj_path = random_obj else: random_obj = replace obj_file = open(random_obj, mode='r') model_xml = obj_file.read() obj_file.close() model_file = self.getModelFile(random_obj) if model_file == "sdf": rospy.wait_for_service('/gazebo/spawn_sdf_model') try: self.add_model_sdf(model_name=current_obj_name, model_xml=model_xml, robot_namespace="", initial_pose=obj.pose, reference_frame="world") except rospy.ServiceException as e: print("Error adding sdf") else: rospy.wait_for_service('/gazebo/spawn_urdf_model') try: self.add_model_urdf(model_name=current_obj_name, model_xml=model_xml, robot_namespace="", initial_pose=obj.pose, reference_frame="world") except rospy.ServiceException as e: print("Error adding urdf") def randomizeStartPose(self, lower, upper): self.environment['reset_conditions']['initial_positions'] = [ np.random.uniform(lower, upper), np.random.uniform(lower, upper), np.random.uniform(lower, upper), np.random.uniform(lower, upper), np.random.uniform(lower, upper), np.random.uniform(lower, upper) ] self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) def randomizeTargetPose(self, obj_name, centerPoint=None): ms = ModelState() if centerPoint is None: EE_POS_TGT = np.asmatrix([ round(np.random.uniform(-0.62713, -0.29082), 5), round(np.random.uniform(-0.15654, 0.15925), 5), self.realgoal[2] ]) roll = 0.0 pitch = 0.0 yaw = np.random.uniform(-1.57, 1.57) q = quat.from_euler_angles(roll, pitch, yaw) EE_ROT_TGT = rot_matrix = quat.as_rotation_matrix(q) self.target_orientation = EE_ROT_TGT ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], EE_POS_TGT, EE_ROT_TGT).T) ms.pose.position.x = EE_POS_TGT[0, 0] ms.pose.position.y = EE_POS_TGT[0, 1] ms.pose.position.z = EE_POS_TGT[0, 2] ms.pose.orientation.x = q.x ms.pose.orientation.y = q.y ms.pose.orientation.z = q.z ms.pose.orientation.w = q.w ms.model_name = obj_name rospy.wait_for_service('gazebo/set_model_state') try: self.set_model_pose(ms) except (rospy.ServiceException) as e: print("Error setting the pose of " + obj_name) else: EE_POS_TGT = np.asmatrix( [self.realgoal[0], self.realgoal[1], centerPoint]) ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], EE_POS_TGT, self.target_orientation).T) ms.pose.position.x = EE_POS_TGT[0, 0] ms.pose.position.y = EE_POS_TGT[0, 1] ms.pose.position.z = EE_POS_TGT[0, 2] ms.pose.orientation.x = 0 ms.pose.orientation.y = 0 ms.pose.orientation.z = 0 ms.pose.orientation.w = 0 self._pub_link_state.publish( LinkState(link_name="target_link", pose=ms.pose, reference_frame="world")) self.realgoal = ee_tgt def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ obs_message = self._observation_msg if obs_message is None: return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # current_quaternion = np.array([angle]+dir.tolist())# # # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here # current_quaternion = quaternion_from_matrix(rotation_matrix) # tgt_quartenion = quaternion_from_matrix(self.target_orientation) current_quaternion = quat.from_rotation_matrix(rotation_matrix) tgt_quartenion = quat.from_rotation_matrix(self.target_orientation) # A = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0] quat_error = current_quaternion * tgt_quartenion.conjugate() rot_vec_err = quat.as_rotation_vector(quat_error) # convert quat to np arrays quat_error = quat.as_float_array(quat_error) # RK: revisit this later, we only take one angle difference here! angle_diff = 2 * np.arccos(np.clip(quat_error[..., 0], -1., 1.)) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(rot_vec_err, -1), np.reshape(ee_velocities, -1), ] return state def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): """ Implement the environment step abstraction. Execute action and returns: - action - observation - reward - done (status) """ self.iterator += 1 # if not collided: # # Execute "action" self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() self.reward_dist = -self.rmse_func( self.ob[self.scara_chain.getNrOfJoints(): (self.scara_chain.getNrOfJoints() + 3)]) # careful we have degrees now so we scale with orientation_scale = 0.1 self.reward_orient = -orientation_scale * self.rmse_func( self.ob[self.scara_chain.getNrOfJoints() + 3:(self.scara_chain.getNrOfJoints() + 6)]) #scale here the orientation because it should not be the main bias of the reward, position should be collided = False if self._collision_msg is not None and self._collision_msg.collision1_name and self._collision_msg.collision2_name: # print("\ncollision detected: ", self._collision_msg) # print("Collision detected") collided = True self.reward = (self.reward_dist + self.reward_orient) * 6.0 # print("Reward collision is: ", self.reward) # Resets the state of the environment and returns an initial observation. # we should avoid this --> huge bottleneck rospy.wait_for_service('/gazebo/reset_simulation') try: self.reset_proxy() # go to the previous state before colliding # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) except (rospy.ServiceException) as e: print("/gazebo/reset_simulation service call failed") # self.goToInit() # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_world', Empty) else: # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 # here is the distance block if abs(self.reward_dist) < 0.01: self.reward = 1 + self.reward_dist # Make the reward increase as the distance decreases print("Reward dist is: ", self.reward) if abs(self.reward_orient) < 0.01: self.reward = 1 + self.reward + self.reward_orient print("Reward dist + orient is: ", self.reward) else: self.reward = self.reward + self.reward_orient print("Reward dist+(orient>0.01) is: ", self.reward) else: self.reward = self.reward_dist done = bool((abs(self.reward_dist) < 0.001) or (self.iterator > self.max_episode_steps) or (abs(self.reward_orient) < 0.001)) # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return # return self.ob, self.reward, done, collided, {} return self.ob, self.reward, done, {} def goToInit(self): self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # # Go to initial position and wait until it arrives there # Wait until the arm is within epsilon of reset configuration. self._time_lock.acquire(True, -1) with self._time_lock: self._currently_resetting = True self._time_lock.release() if self._currently_resetting: epsilon = 1e-3 reset_action = self.environment['reset_conditions'][ 'initial_positions'] now_action = self._observation_msg.actual.positions du = np.linalg.norm(reset_action - now_action, float(np.inf)) self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) if du > epsilon: self._currently_resetting = True self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions'] ['initial_positions'])) time.sleep(3) def reset(self): """ Reset the agent for a particular experiment condition. """ # self._pub_rand_light.publish() # self._pub_rand_sky.publish() # self._pub_rand_physics.publish() # self._pub_rand_obstacles.publish() # self.randomizeTargetPose("obj") # self.randomizeTexture("obj") # self.randomizeSize("obj", "box") # common_path = self.envs_path + "/assets/urdf/objs/" # path_list = [common_path + "rubik_cube/rubik_cube_random.sdf", common_path + "rubik_cube/rubik_cube.sdf", # common_path + "box.sdf", common_path + "cylinder.sdf", common_path + "sphere.urdf"] # for pl in path_list: # if pl == self.obj_path: # path_list.remove(pl) # self.randomizeObjectType("obj", path_list) self.iterator = 0 if self.reset_jnts is True: self._pub.publish( self.get_trajectory_message( self.environment['reset_conditions']['initial_positions'])) # self.randomizeStartPose(-3.13, 3.13) # # Generate a new random start pose until it is not a colliding state # while self._collision_msg is not None: # self.randomizeStartPose(-3.13, 3.13) if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif (self.slowness_unit == 'nsec'): time.sleep(int(self.slowness / 1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob