class GazeboModularScara3DOFv3Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # limit the max episode step # class variable that iterates to accounts for number of steps per episode self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self.choose_robot = 0 ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # make sure that the slowness is set properly!! In case we forget them defaults to seconds. # if self.slowness is None: # slowness = 1 # # else: # slowness = self.slowness # slowness = 100000000 # 1 is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_3joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # Seed the environment self._seed() # self.max_steps_episode = 1000 def init_time(self, slowness =1, slowness_unit='sec', reset_jnts=True): self.slowness = slowness self.slowness_unit = slowness_unit self.reset_jnts = reset_jnts print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) print("reset joints: ", self.reset_jnts, "type of variable: ", type(self.reset_jnts)) def randomizeTargetPositions(self): """ The goal is to test with randomized positions which range between the boundries of the H-ROS logo """ print("In randomize target positions.") EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485,0.3883636), np.random.uniform(-0.1746508,0.1701576), 0.3746]) # boundry box of the first half H-ROS letters with +-0.01 offset EE_POS_TGT_RANDOM2 = np.asmatrix([np.random.uniform(0.2852485,0.3883636), np.random.uniform(-0.1746508,0.1701576), 0.3746]) # boundry box of the H-ROS letters with +-0.01 offset # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1 ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeTarget(self): print("calling randomize target") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) def randomizeMultipleTargets(self): print("calling randomize multiple targets") EE_POS_TGT_1 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.0985179, 0.3746]) # center of H right EE_POS_TGT_3 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_4 = np.asmatrix([0.3355224, 0.0344309, 0.3746]) # center of O left EE_POS_TGT_5 = np.asmatrix([0.3013209, 0.1647450, 0.3746]) # S top right EE_POS_TGT_6 = np.asmatrix([0.3349774, 0.1570571, 0.3746]) # S midlle EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 ee_pos_tgt_3 = EE_POS_TGT_3 ee_pos_tgt_4 = EE_POS_TGT_4 ee_pos_tgt_5 = EE_POS_TGT_5 ee_pos_tgt_6 = EE_POS_TGT_6 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) target3 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_3, ee_rot_tgt).T) target4 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_4, ee_rot_tgt).T) target5 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_5, ee_rot_tgt).T) target6 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_6, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ all_targets = [target1,target2,target3,target4,target5,target6] self.realgoal = random.choice(all_targets) # self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeCorrect realgoal: ", self.realgoal) def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # target.time_from_start.nsecs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty"); # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all(map(lambda x,y: x in y, message.joint_names, [self._valid_joint_set[robot_id] for _ in range(len(message.joint_names))])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array(message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross(ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics(self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal#self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities(ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1),] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1),] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator+=1 # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # Take an observation # TODO: program this better, check that ob is not None, etc. # self.ob = take_observation() # self.reward_dist = self.rmse_func(self.ob[3:6]) # # print("reward_dist :", self.reward_dist) # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases # # # Calculate if the env has been solved # done = bool(abs(self.reward_dist) < 0.005) # reward as in the enviroment for 4DOF commented out self.reward_dist = -self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if(self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)])<0.005): self.reward = 1 - self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():(self.scara_chain.getNrOfJoints()+3)]) # Make the reward increase as the distance decreases print("Reward is: ", self.reward) else: self.reward = self.reward_dist # print("reward: ", self.reward) # print("rmse_func: ", self.rmse_func(ee_points)) # # enviroment V2 reward # self.reward_dist = self.rmse_func(self.ob[3:6]) # # print("reward_dist :", self.reward_dist) # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases # Calculate if the env has been solved done = (bool(abs(self.reward_dist) < 0.005)) or (self.iterator > self.max_episode_steps) # # Unpause simulation # rospy.wait_for_service('/gazebo/unpause_physics') # try: # self.unpause() # except (rospy.ServiceException) as e: # print ("/gazebo/unpause_physics service call failed") # Execute "action" # if rclpy.ok(): # ROS 2 code self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) #TODO: wait until action gets executed # When adding this the algorithm does not converge # time.sleep(int(self.environment['slowness'])) # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while(self.ob is None): self.ob = self.take_observation() # print("in step, action: ", action[:3]) # print("in step, observation: ", self.ob[:3]) # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def _reset(self): # """ # Reset the agent for a particular experiment condition. # """ # # Resets the state of the environment and returns an initial observation. # rospy.wait_for_service('/gazebo/reset_simulation') # try: # #reset_proxy.call() # self.reset_proxy() # except (rospy.ServiceException) as e: # print ("/gazebo/reset_simulation service call failed") #., # # Unpause simulation # rospy.wait_for_service('/gazebo/unpause_physics') # try: # self.unpause() # except (rospy.ServiceException) as e: # print ("/gazebo/unpause_physics service call failed") # Go to initial position and wait until it arrives there # self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])) # time.sleep(int(self.environment['slowness'])) # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds # # Pause the simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") self.iterator = 0 if self.reset_jnts is True: self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])) if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif(self.slowness_unit == 'nsec'): time.sleep(int(self.slowness/1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while(self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # limit the max episode step # class variable that iterates to accounts for number of steps per episode self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self.choose_robot = 0 ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # make sure that the slowness is set properly!! In case we forget them defaults to seconds. # if self.slowness is None: # slowness = 1 # # else: # slowness = self.slowness # slowness = 100000000 # 1 is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_3joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # Seed the environment self._seed()
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara3_Obstacles_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self._collision_msg = None ### self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # limit the max episode step # class variable that iterates to accounts for number of steps per episode self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.26342]) #center of S # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # make sure that the slowness is set properly!! In case we forget them defaults to seconds. # if self.slowness is None: # slowness = 1 # # else: # slowness = self.slowness # slowness = 100000000 # 1 is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base' # BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' # SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' # SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' # SCARA_BAR_MOTOR1 = 'scara_e1_bar1' # SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' # SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' # SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' # SCARA_BAR_MOTOR2 = 'scara_e1_bar2' # SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' # SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' # SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' # SCARA_BAR_MOTOR3 = 'scara_e1_bar3' # SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ SCARA_MOTOR1, SCARA_MOTOR2, SCARA_MOTOR3, SCARA_RANGEFINDER, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" # URDF_PATH = "/home/erle/ros_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf" #URDF_PATH = "/media/raid/RL/catkin_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf" rospack = rospkg.RosPack() prefix_path = rospack.get_path('scara_e1_description') if (prefix_path == None): print("I can't find scara_e1_description") sys.exit(0) URDF_PATH = prefix_path + "/urdf/scara_e1_3joints_simplified.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState, self.collision_callback) ## # self._sub_normals = rospy.Subscriber('/gazebo_normals', Vector3, self.normals_callback, queue_size=1, buff_size=2**24) ## #self._sub_coll = rospy.Subscriber('/gazebo_contacts',Bool, collision_callback) ## # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. # print("self.environment['link_names'][0]:", self.environment['link_names'][0]) # print("self.environment['link_names'][-1]:", self.environment['link_names'][-1]) self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] print("nr of joints: ", self.scara_chain.getNrOfJoints()) # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() #low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) #high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = 0.055 #EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") #self.addObstacle() # Seed the environment self._seed()
class GazeboModularScara3DOFStaticObstaclev1Env(gazebo_env.GazeboEnv): """ This environment present a modular SCARA robot with a range finder at its end pointing towards the workspace of the robot. The goal of this environment is defined to reach the center of the "H" or the "O" from the "H-ROS" logo within the worspace. This environment uses `slowness=1` and matches the delay between actions/observations to this value (slowness). In other words, actions are taken at "1/slowness" rate. Reward is determined ... (TODO: describe the heuristic or reward calculation method) """ def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara3_Obstacles_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self._collision_msg = None ### self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # limit the max episode step # class variable that iterates to accounts for number of steps per episode self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.26342]) #center of S # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # make sure that the slowness is set properly!! In case we forget them defaults to seconds. # if self.slowness is None: # slowness = 1 # # else: # slowness = self.slowness # slowness = 100000000 # 1 is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base' # BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' # SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' # SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' # SCARA_BAR_MOTOR1 = 'scara_e1_bar1' # SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' # SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' # SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' # SCARA_BAR_MOTOR2 = 'scara_e1_bar2' # SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' # SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' # SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' # SCARA_BAR_MOTOR3 = 'scara_e1_bar3' # SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ SCARA_MOTOR1, SCARA_MOTOR2, SCARA_MOTOR3, SCARA_RANGEFINDER, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" # URDF_PATH = "/home/erle/ros_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf" #URDF_PATH = "/media/raid/RL/catkin_ws/src/scara_e1/scara_e1_description/urdf/scara_e1_3joints.urdf" rospack = rospkg.RosPack() prefix_path = rospack.get_path('scara_e1_description') if (prefix_path == None): print("I can't find scara_e1_description") sys.exit(0) URDF_PATH = prefix_path + "/urdf/scara_e1_3joints_simplified.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState, self.collision_callback) ## # self._sub_normals = rospy.Subscriber('/gazebo_normals', Vector3, self.normals_callback, queue_size=1, buff_size=2**24) ## #self._sub_coll = rospy.Subscriber('/gazebo_contacts',Bool, collision_callback) ## # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. # print("self.environment['link_names'][0]:", self.environment['link_names'][0]) # print("self.environment['link_names'][-1]:", self.environment['link_names'][-1]) self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] print("nr of joints: ", self.scara_chain.getNrOfJoints()) # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() #low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) #high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = 0.055 #EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") #self.addObstacle() # Seed the environment self._seed() # self.max_steps_episode = 1000 def init_time(self, slowness=1, slowness_unit='sec', reset_jnts=False): self.slowness = slowness self.slowness_unit = slowness_unit print("slowness: ", self.slowness) print("slowness_unit: ", self.slowness_unit, "type of variable: ", type(slowness_unit)) def randomizeObstacle(self): print("calling randomize obstacle") #EE_POS_TGT_1 = np.asmatrix([0.3239543, 0.0083323, 0.3746]) #center of R #EE_POS_TGT_1 = np.asmatrix([0.29557, -0.0422738, 0.3746]) # R top left EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571, 0.26342]) #center of S 0.26342 #EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571, 0.3746]) # S center #EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.26342]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ #self.realgoal = target1 if np.random.uniform() < 0.5 else target2 if np.random.uniform() < 0.5: self.addObstacle() #self.realgoal = target2 self.realgoal = target1 print("\n\nOBSTACLE\n\n") else: # self.removeObstacle() self.addObstacle() self.realgoal = target1 print("\n\nNO OBSTACLE\n\n") print("randomizeObstacle realgoal: ", self.realgoal) #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])]) # 0 = obstacle. 1 = no obstacle. # self.realgoal = 0 # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H #def randomizeCorrect(self): def randomizeTarget(self): print("calling randomize correct") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal) #self.realgoal = #ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T)#np.array([self.np_random.choice([0, 1, 2, 3])]) # 0 = obstacle. 1 = no obstacle. # self.realgoal = 0 # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H def collision_callback(self, message): """ Callback method for the subscriber of Collision data """ # print("\ncollision: ", self._collision_msg) self._collision_msg = message def normals_callback(self, message): """ Callback method for the subscriber of Collision data """ self._normals_msg = message def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. if (self.slowness_unit == 'sec') or (self.slowness_unit is None): target.time_from_start.secs = self.slowness elif (self.slowness_unit == 'nsec'): target.time_from_start.nsecs = self.slowness else: print("Unrecognized unit. Please use sec or nsec.") # target.time_from_start.nsecs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for self.scara_chain.getNrOfJoints() joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(self.scara_chain.getNrOfJoints()) # Initialize a joint array for the present self.scara_chain.getNrOfJoints() joint angles. angles = JntArray(self.scara_chain.getNrOfJoints()) # Construct the joint array from the most recent joint angles. for i in range(self.scara_chain.getNrOfJoints()): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, self.scara_chain.getNrOfJoints()) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] # print("current_ee_tgt: ", current_ee_tgt) # print("ee_points:", ee_points) ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - reward - done (status) - action - observation - dictionary (#TODO clarify) """ self.iterator += 1 # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # Take an observation # TODO: program this better, check that ob is not None, etc. # self.ob = take_observation() # self.reward_dist = self.rmse_func(self.ob[3:6]) # # print("reward_dist :", self.reward_dist) # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases # # # Calculate if the env has been solved # done = bool(abs(self.reward_dist) < 0.005) # reward as in the enviroment for 4DOF commented out self.reward_dist = -self.rmse_func( self.ob[self.scara_chain.getNrOfJoints(): (self.scara_chain.getNrOfJoints() + 3)]) # here we want to fetch the positions of the end-effector which are nr_dof:nr_dof+3 if (self.rmse_func(self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) < 0.005): self.reward = 1 - self.rmse_func( self.ob[self.scara_chain.getNrOfJoints():( self.scara_chain.getNrOfJoints() + 3)]) # Make the reward increase as the distance decreases print("Reward is: ", self.reward) else: self.reward = self.reward_dist # print("\n STEP -> collision_msg", self._collision_msg) # print("\n STEP -> collision_msg.data", self._collision_msg.data) # If there is a collision, penalize reward considerably # if self._collision_msg.data == True: # self.reward = self.reward - 1000 # # print("\n\n\n Penalized reward after COLLISION", self._collision_msg) # print("string is empty") # print("reward: ", self.reward) # print("rmse_func: ", self.rmse_func(ee_points)) # # enviroment V2 reward # self.reward_dist = self.rmse_func(self.ob[3:6]) # # print("reward_dist :", self.reward_dist) # self.reward = 1 - self.reward_dist # Make the reward increase as the distance decreases # Calculate if the env has been solved done = (bool(abs(self.reward_dist) < 0.005)) or ( self.iterator > self.max_episode_steps) # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) # # Unpause simulation # rospy.wait_for_service('/gazebo/unpause_physics') # try: # self.unpause() # except (rospy.ServiceException) as e: # print ("/gazebo/unpause_physics service call failed") # # Execute "action" # if self._collision_msg.collision2_name == '' or self._collision_msg.collision1_name == '': # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) # else: # # self._pub.publish(self.get_trajectory_message(action[:self.scara_chain.getNrOfJoints()])) # # self._pub.publish(self.get_trajectory_message(self.environment['reset_conditions']['initial_positions'])) # self.reward = self.reward - 5 # print("\n\n\n Penalized reward after COLLISION", self._collision_msg) # time.sleep(1) self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) #REWARD SHAPING sophisticated penalization based on https://arxiv.org/pdf/1609.07845.pdf if self._collision_msg.collision2_name == '' or self._collision_msg.collision1_name == '': self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) else: #check if depth is existing if self._collision_msg.depths is None: print("self._collision_msg.depths[0] is empty") # print("Contact detected.", self._collision_msg.collision1_name, ", ", self._collision_msg.collision2_name) else: contact_depths = 1000 * self._collision_msg.depths[ 0] #make them in mm otherwise we have too many decimals print("\ncontact_depths", contact_depths) self._pub.publish( self.get_trajectory_message( action[:self.scara_chain.getNrOfJoints()])) #we always assume that depths is positive here. Sometimes depths is negative (actually most of the time). # changing to abs value of depths if contact_depths > 0 and abs(contact_depths) < 0.0001: self.reward = self.reward - (abs(contact_depths)) / 2 elif contact_depths > 0 and abs(contact_depths) > 0.0001: self.reward = self.reward - abs(contact_depths) else: print("self._collision_msg.depths[0]:", contact_depths) #TODO: wait until action gets executed # When adding this the algorithm does not converge # time.sleep(int(self.environment['slowness'])) # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds # # Pause simulation # rospy.wait_for_service('/gazebo/pause_physics') # try: # #resp_pause = pause.call() # self.pause() # except (rospy.ServiceException) as e: # print ("/gazebo/pause_physics service call failed") # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # print("in step, action: ", action[:3]) # print("in step, observation: ", self.ob[:3]) # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def addObstacle(self): rospy.wait_for_service('/gazebo/spawn_urdf_model') # <material name=\"green\">\ # <color rgba=\"0 0.8 .8 1\"/>\ # </material>\ try: # #NORMAL CYLINDER model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <box size=\".1 .05 1\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"blue\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1\" iyz=\"0.0\" izz=\"1\"/>\ </inertial>\ <collision>\ <geometry>\ <box size=\".2 .05 1\"/>\ </geometry>\ <surface>\ <contact>\ <ode>\ <min_depth>\"0.001\"</min_depth>\ <kp>\"1e6\"</kp>\ <kd>\"1.000000\"</kd>\ </ode>\ </contact>\ </surface>\ </collision>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0.5\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/Blue</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() number = random.randint(0, 4) #pose.position.x = 0.15;# pose.position.x = 0.3 # pose.position.y = 0.07 # pose.position.z = 0.05 pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 #BOX # model_xml = "<?xml version=\"1.0\"?> \ # <robot name=\"myfirst\"> \ # <link name=\"world\"> \ # </link>\ # <link name=\"box0\">\ # <visual>\ # <geometry>\ # <box>\ # <size>0.2 0.2 1.0</size>\ # </box>\ # </geometry>\ # <origin xyz=\"0 0 0\"/>\ # <material name=\"blue\">\ # <ambient>0.5 0.5 1.0 0.1</ambient>\ # <diffuse>0.5 0.5 1.0 0.1</diffuse>\ # </material>\ # </visual>\ # <inertial>\ # <mass value=\"5.0\"/>\ # <inertia>\ # <ixx>1.0</ixx>\ # <ixy>0</ixy>\ # <ixz>0</ixz>\ # <iyy>1.0</iyy>\ # <iyz>0</iyz>\ # <izz>1.0</izz>\ # </inertia>\ # </inertial>\ # <collision>\ # <geometry>\ # <box>\ # <size>0.2 0.2 1.0</size>\ # </box>\ # </geometry>\ # </collision>\ # </link>\ # <joint name=\"world_to_base\" type=\"fixed\"> \ # <origin xyz=\"0 0 0.7\" rpy=\"0 0 0\"/>\ # <parent link=\"world\"/>\ # <child link=\"box0\"/>\ # </joint>\ # <gazebo reference=\"box0\">\ # <material>Gazebo/Blue</material>\ # </gazebo>\ # </robot>" # model_xml = "<?xml version=\"1.0\"?> \ # <robot name=\"myfirst\"> \ # <link name=\"world\"> \ # </link>\ # <link name=\"cylinder0\">\ # <visual>\ # <geometry>\ # <sphere radius=\"0.01\"/>\ # </geometry>\ # <origin xyz=\"0 0 0\"/>\ # <material name=\"rojotransparente\">\ # <ambient>0.5 0.5 1.0 0.1</ambient>\ # <diffuse>0.5 0.5 1.0 0.1</diffuse>\ # </material>\ # </visual>\ # <inertial>\ # <mass value=\"5.0\"/>\ # <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ # </inertial>\ # </link>\ # <joint name=\"world_to_base\" type=\"fixed\"> \ # <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ # <parent link=\"world\"/>\ # <child link=\"cylinder0\"/>\ # </joint>\ # <gazebo reference=\"cylinder0\">\ # <material>Gazebo/GreenTransparent</material>\ # </gazebo>\ # </robot>" # robot_namespace = "" # pose = Pose() # number = random.randint(0, 4) # # if number == 1: # # pose.position.x = 0.25; # # pose.position.y = 0.07; # # if number == 2: # # pose.position.x = 0.25; # # pose.position.y = -0.07; # # if number == 3: # # pose.position.x = 0.23; # # pose.position.y = -0.087; # # if number == 4: # # pose.position.x = 0.23; # # pose.position.y = 0.087; # # pose.position.x = 0.25;# # pose.position.y = 0.07;# # # pose.position.z = 0.0; # pose.orientation.x = 0; # pose.orientation.y= 0; # pose.orientation.z = 0; # pose.orientation.w = 0; reference_frame = "" if number > -1: self.add_model(model_name="obstacle", model_xml=model_xml, robot_namespace=robot_namespace, initial_pose=pose, reference_frame="") except (rospy.ServiceException) as e: print("/gazebo/spawn_urdf_model service call failed") def removeObstacle(self): rospy.wait_for_service('/gazebo/delete_model') try: self.remove_model(model_name="obstacle") except (rospy.ServiceException) as e: print("/gazebo/spawn_urdf_model service call failed") def _reset(self): # """ # Reset the agent for a particular experiment condition. # """ self.iterator = 0 msg = self.get_trajectory_message( self.environment['reset_conditions']['initial_positions']) msg.points[0].time_from_start.secs = 0 msg.points[0].time_from_start.nsecs = 10000 self._pub.publish(msg) # time.sleep(int(self.environment['slowness'])) # using seconds # time.sleep(int(self.environment['slowness'])/1000000000) # using nanoseconds if (self.slowness_unit == 'sec') or (self.slowness_unit is None): time.sleep(int(self.slowness)) elif (self.slowness_unit == 'nsec'): time.sleep(int(self.slowness / 1000000000)) # using nanoseconds else: print("Unrecognized unit. Please use sec or nsec.") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Return the corresponding observation return self.ob
def __init__(self): """ Initialize the MARA environemnt """ # Manage command line args args = ut_generic.getArgsParserMARA().parse_args() self.real_speed = args.real_speed self.velocity = args.velocity # Set the path of the corresponding URDF file URDF_PATH = get_prefix_path( "mara_description" ) + "/share/mara_description/urdf/mara_robot_gripper_140.urdf" # Create the node after the new ROS_DOMAIN_ID is set in generate_launch_description() rclpy.init(args=None) self.node = rclpy.create_node(self.__class__.__name__) # class variables self._observation_msg = None self.obs = None self.action_space = None self.target_position = None self.target_orientation = None self.max_episode_steps = 1024 self.iterator = 0 self.reset_jnts = True ############################# # Environment hyperparams ############################# # Target, where should the agent reach self.target_position = np.asarray([-0.40028, 0.095615, 0.72466]) # close to the table self.target_orientation = np.asarray( [0., 0.7071068, 0.7071068, 0.]) # arrow looking down [w, x, y, z] # self.target_position = np.asarray([-0.386752, -0.000756, 1.40557]) # easy point # self.target_orientation = np.asarray([-0.4958324, 0.5041332, 0.5041331, -0.4958324]) # arrow looking opposite to MARA [w, x, y, z] EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' MOTOR6_JOINT = 'motor6' EE_LINK = 'ee_link' # Set constants for links WORLD = 'world' TABLE = 'table' BASE = 'base_link' BASE_ROBOT = 'base_robot' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT ] LINK_NAMES = [ WORLD, TABLE, BASE, BASE_ROBOT, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) # Initialize target end effector position self.environment = { 'joint_order': m_joint_order, 'link_names': m_link_names, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'end_effector_points': EE_POINTS, } # Subscribe to the appropriate topics, taking into account the particular robot self._pub = self.node.create_publisher( JointTrajectory, JOINT_PUBLISHER, qos_profile=qos_profile_sensor_data) self._sub = self.node.create_subscription( JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data) self.reset_sim = self.node.create_client(Empty, '/reset_simulation') # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = tree_urdf.treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.mara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) self.num_joints = self.mara_chain.getNrOfJoints() # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.mara_chain) self.obs_dim = self.num_joints + 10 # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] low = -np.pi * np.ones(self.num_joints) high = np.pi * np.ones(self.num_joints) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # Seed the environment self.seed()
def __init__(self): # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "Box_Vision_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self._camera_image = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0, 0, 0]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # # Set the number of seconds per step of a sample. # TIMESTEP = 0.01 # Typically 0.01. # # Set the number of timesteps per sample. # STEP_COUNT = 100 # Typically 100. # # Set the number of samples per condition. # SAMPLE_COUNT = 5 # Typically 5. # # Set the number of trajectory iterations to collect. # ITERATIONS = 20 # Typically 10. # How much time does it take to execute the trajectory (in seconds) slowness = 2 # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# ############################# rospack = rospkg.RosPack() prefix_path = rospack.get_path('scara_e1_description') if (prefix_path == None): print("I can't find scara_e1_description") sys.exit(0) URDF_PATH = prefix_path + "/urdf/scara_e1_model_3joints_simplified_camera_behind.urdf.xacro" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.environment = { # 'dt': TIMESTEP, 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) self._image_sub = rospy.Subscriber('/scara/camera/image_raw', Image, self.camera_callback) #self._sub = rospy.Subscriber('/scara/camera', JointTrajectoryControllerState, self.observation_callback) # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions # rclpy.init(args=None) # self.ros2_node = rclpy.create_node('robot_ai_node') # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER) # # self._callbacks = partial(self.observation_callback, robot_id=0) # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data) # # self._time_lock = threading.RLock() # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = 9 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) # Seed the environment self._seed() model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = 0.055 #EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="")
class Box3DOFv1Env(gazebo_env.GazeboEnv): def __init__(self): # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "Box_Vision_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self._camera_image = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0, 0, 0]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # # Set the number of seconds per step of a sample. # TIMESTEP = 0.01 # Typically 0.01. # # Set the number of timesteps per sample. # STEP_COUNT = 100 # Typically 100. # # Set the number of samples per condition. # SAMPLE_COUNT = 5 # Typically 5. # # Set the number of trajectory iterations to collect. # ITERATIONS = 20 # Typically 10. # How much time does it take to execute the trajectory (in seconds) slowness = 2 # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# ############################# rospack = rospkg.RosPack() prefix_path = rospack.get_path('scara_e1_description') if (prefix_path == None): print("I can't find scara_e1_description") sys.exit(0) URDF_PATH = prefix_path + "/urdf/scara_e1_model_3joints_simplified_camera_behind.urdf.xacro" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.environment = { # 'dt': TIMESTEP, 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) self._image_sub = rospy.Subscriber('/scara/camera/image_raw', Image, self.camera_callback) #self._sub = rospy.Subscriber('/scara/camera', JointTrajectoryControllerState, self.observation_callback) # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions # rclpy.init(args=None) # self.ros2_node = rclpy.create_node('robot_ai_node') # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER) # # self._callbacks = partial(self.observation_callback, robot_id=0) # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data) # # self._time_lock = threading.RLock() # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = 9 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) # Seed the environment self._seed() model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = 0.055 #EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") def observation_callback(self, message): """ Callback method for the subscriber of JointTrajectoryControllerState """ self._observation_msg = message # print("!!!!!!!!!Received an observation") def camera_callback(self, message): """ Callback method for the subscriber of Image """ self._camera_image = message print("\ncamera_image.height", self._camera_image.height) print("\ncamera_image.width", self._camera_image.width) def get_trajectory_message(self, action, robot_id=0): """ Helper function. Wraps an action vector of joint angles into a JointTrajectory message. The velocities, accelerations, and effort do not control the arm motion """ # Set up a trajectory message to publish. action_msg = JointTrajectory() action_msg.joint_names = self.environment['joint_order'] # Create a point to tell the robot to move to. target = JointTrajectoryPoint() action_float = [float(i) for i in action] target.positions = action_float # These times determine the speed at which the robot moves: # it tries to reach the specified target position in 'slowness' time. target.time_from_start.secs = self.environment['slowness'] # Package the single point into a trajectory of points with length 1. action_msg.points = [target] return action_msg def process_observations(self, message, agent, robot_id=0): """ Helper fuinction to convert a ROS message to joint angles and velocities. Check for and handle the case where a message is either malformed or contains joint values in an order different from that expected observation_callback in hyperparams['joint_order'] """ if not message: print("Message is empty") # return None else: # # Check if joint values are in the expected order and size. if message.joint_names != agent['joint_order']: # Check that the message is of same size as the expected message. if len(message.joint_names) != len(agent['joint_order']): raise MSG_INVALID_JOINT_NAMES_DIFFER # Check that all the expected joint values are present in a message. if not all( map(lambda x, y: x in y, message.joint_names, [ self._valid_joint_set[robot_id] for _ in range(len(message.joint_names)) ])): raise MSG_INVALID_JOINT_NAMES_DIFFER print("Joints differ") return np.array( message.actual.positions) # + message.actual.velocities def get_jacobians(self, state, robot_id=0): """ Produce a Jacobian from the urdf that maps from joint angles to x, y, z. This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles. The angles are roll, pitch, and yaw (not Euler angles) and are not needed. Returns a repackaged Jacobian that is 3x6. """ # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles jacobian = Jacobian(3) # Initialize a joint array for the present 6 joint angles. angles = JntArray(3) # Construct the joint array from the most recent joint angles. for i in range(3): angles[i] = state[i] # Update the jacobian by solving for the given angles.observation_callback self.jac_solver.JntToJac(angles, jacobian) # Initialize a numpy array to store the Jacobian. J = np.array([[jacobian[i, j] for j in range(jacobian.columns())] for i in range(jacobian.rows())]) # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles ee_jacobians = J return ee_jacobians def get_ee_points_jacobians(self, ref_jacobian, ee_points, ref_rot): """ Get the jacobians of the points on a link given the jacobian for that link's origin :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :return: 3N x 6 Jac_trans, each 3 x 6 numpy array is the Jacobian[:3, :] for that point 3N x 6 Jac_rot, each 3 x 6 numpy array is the Jacobian[3:, :] for that point """ ee_points = np.asarray(ee_points) ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] end_effector_points_rot = np.expand_dims(ref_rot.dot(ee_points.T).T, axis=1) ee_points_jac_trans = np.tile(ref_jacobians_trans, (ee_points.shape[0], 1)) + \ np.cross(ref_jacobians_rot.T, end_effector_points_rot).transpose( (0, 2, 1)).reshape(-1, 3) ee_points_jac_rot = np.tile(ref_jacobians_rot, (ee_points.shape[0], 1)) return ee_points_jac_trans, ee_points_jac_rot def get_ee_points_velocities(self, ref_jacobian, ee_points, ref_rot, joint_velocities): """ Get the velocities of the points on a link :param ref_jacobian: 6 x 6 numpy array, jacobian for the link's origin :param ee_points: N x 3 numpy array, points' coordinates on the link's coordinate system :param ref_rot: 3 x 3 numpy array, rotational matrix for the link's coordinate system :param joint_velocities: 1 x 6 numpy array, joint velocities :return: 3N numpy array, velocities of each point """ ref_jacobians_trans = ref_jacobian[:3, :] ref_jacobians_rot = ref_jacobian[3:, :] ee_velocities_trans = np.dot(ref_jacobians_trans, joint_velocities) ee_velocities_rot = np.dot(ref_jacobians_rot, joint_velocities) ee_velocities = ee_velocities_trans + np.cross( ee_velocities_rot.reshape(1, 3), ref_rot.dot(ee_points.T).T) return ee_velocities.reshape(-1) def addObstacle(self): print("\nADD OBSTACLE") rospy.wait_for_service('/gazebo/spawn_urdf_model') try: # #NORMAL BOX model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <box size=\".05 .05 .05\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"red\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"10.\"/>\ <inertia ixx=\"1\" ixy=\".0\" ixz=\"0.0\" iyy=\"1\" iyz=\"0.0\" izz=\"1\"/>\ </inertial>\ <collision>\ <geometry>\ <box size=\".05 .05 .05\"/>\ </geometry>\ </collision>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0.3\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/Red</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() number = random.randint(0, 4) #pose.position.x = 0.15;# pose.position.x = 0.3 # pose.position.y = 0.07 # pose.position.z = 0.05 pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" self.add_model(model_name="obstacle", model_xml=model_xml, robot_namespace=robot_namespace, initial_pose=pose, reference_frame="") except (rospy.ServiceException) as e: print("/gazebo/spawn_urdf_model service call failed") def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False # # ROS 2, Acquire the lock to prevent the subscriber thread from # # updating times or observation messages. # self._time_lock.acquire(True) obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:3], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] def rmse_func(self, ee_points): """ Computes the Residual Mean Square Error of the difference between current and desired end-effector position """ rmse = np.sqrt(np.mean(np.square(ee_points), dtype=np.float32)) return rmse def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): """ Implement the environment step abstraction. Execute action and returns: - observation - reward - done (status) - dictionary (#TODO clarify) """ # Unpause simulation rospy.wait_for_service('/gazebo/unpause_physics') try: self.unpause() except (rospy.ServiceException) as e: print("/gazebo/unpause_physics service call failed") # Execute "action" # if rclpy.ok(): # ROS 2 code self._pub.publish(self.get_trajectory_message(action[:3])) # # Take an observation # TODO: program this better, check that ob is not None, etc. self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Pause simulation rospy.wait_for_service('/gazebo/pause_physics') try: #resp_pause = pause.call() self.pause() except (rospy.ServiceException) as e: print("/gazebo/pause_physics service call failed") # Take an observation # TODO: program this better, check that ob is not None, etc. # self.ob = take_observation() self.reward_dist = -self.rmse_func(self.ob[2]) if abs(self.reward_dist) < 0.005: # print("reward (Eucledian dist (mm)): ", -1000 * self.reward_dist) self.reward = 100 + self.reward_dist else: # print("reward (Eucledian dist (mm)): ", -1000 * self.reward_dist) self.reward = self.reward_dist # print("reward: ", self.reward) # Calculate if the env has been solved done = bool(abs(self.reward_dist) < 0.005) # self._time_lock.release() # ROS 2 code # Return the corresponding observations, rewards, etc. # TODO, understand better what's the last object to return return self.ob, self.reward, done, {} def _reset(self): """ Reset the agent for a particular experiment condition. """ # Resets the state of the environment and returns an initial observation. rospy.wait_for_service('/gazebo/reset_simulation') try: #reset_proxy.call() self.reset_proxy() except (rospy.ServiceException) as e: print("/gazebo/reset_simulation service call failed") # Unpause simulation rospy.wait_for_service('/gazebo/unpause_physics') try: self.unpause() except (rospy.ServiceException) as e: print("/gazebo/unpause_physics service call failed") # Take an observation self.ob = self.take_observation() while (self.ob is None): self.ob = self.take_observation() # Pause the simulation rospy.wait_for_service('/gazebo/pause_physics') try: self.pause() except (rospy.ServiceException) as e: print("/gazebo/pause_physics service call failed") # Return the corresponding observation return self.ob