def randomizeTarget(self): print("calling randomize target") EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.2868]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.2868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal)
def randomizeTargetPositions(self): """ The goal is to test with randomized positions which range between the boundries of the H-ROS logo """ print("In randomize target positions.") EE_POS_TGT_RANDOM1 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the first half H-ROS letters with +-0.01 offset EE_POS_TGT_RANDOM2 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the H-ROS letters with +-0.01 offset # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1 ee_pos_tgt_random2 = EE_POS_TGT_RANDOM2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random2, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeTarget realgoal: ", self.realgoal)
def randomizeTargetPose(self, obj_name, centerPoint=False): ms = ModelState() if not centerPoint: EE_POS_TGT = np.asmatrix([ round(np.random.uniform(-0.62713, -0.29082), 5), round(np.random.uniform(-0.15654, 0.15925), 5), self.realgoal[2] ]) roll = 0.0 pitch = 0.0 yaw = np.random.uniform(-1.57, 1.57) #tf.taitbryan.euler2quat(z, y, x) --> [w, x, y, z] q = tf.taitbryan.euler2quat(yaw, pitch, roll) EE_ROT_TGT = tf.quaternions.quat2mat(q) self.target_orientation = EE_ROT_TGT ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], EE_POS_TGT, EE_ROT_TGT).T) ms.pose.position.x = EE_POS_TGT[0, 0] ms.pose.position.y = EE_POS_TGT[0, 1] ms.pose.position.z = EE_POS_TGT[0, 2] ms.pose.orientation.x = q[1] ms.pose.orientation.y = q[2] ms.pose.orientation.z = q[3] ms.pose.orientation.w = q[0] if obj_name != "target": ms.model_name = obj_name rospy.wait_for_service('gazebo/set_model_state') try: self.set_model_pose(ms) except (rospy.ServiceException) as e: print("Error setting the pose of " + obj_name) self.spawnModel(obj_name, self.obj_path, ms.pose) else: EE_POS_TGT = np.asmatrix( [self.realgoal[0], self.realgoal[1], centerPoint]) ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], EE_POS_TGT, self.target_orientation).T) ms.pose.position.x = EE_POS_TGT[0, 0] ms.pose.position.y = EE_POS_TGT[0, 1] ms.pose.position.z = EE_POS_TGT[0, 2] ms.pose.orientation.x = 0 ms.pose.orientation.y = 0 ms.pose.orientation.z = 0 ms.pose.orientation.w = 0 self._pub_link_state.publish( LinkState(link_name="target_link", pose=ms.pose, reference_frame="world")) self.realgoal = ee_tgt
def randomizeTargetPose(self, obj_name, centerPoint=None): ms = ModelState() if centerPoint is None: EE_POS_TGT = np.asmatrix([ round(np.random.uniform(-0.62713, -0.29082), 5), round(np.random.uniform(-0.15654, 0.15925), 5), self.realgoal[2] ]) roll = 0.0 pitch = 0.0 yaw = np.random.uniform(-1.57, 1.57) q = quat.from_euler_angles(roll, pitch, yaw) EE_ROT_TGT = rot_matrix = quat.as_rotation_matrix(q) self.target_orientation = EE_ROT_TGT ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], EE_POS_TGT, EE_ROT_TGT).T) ms.pose.position.x = EE_POS_TGT[0, 0] ms.pose.position.y = EE_POS_TGT[0, 1] ms.pose.position.z = EE_POS_TGT[0, 2] ms.pose.orientation.x = q.x ms.pose.orientation.y = q.y ms.pose.orientation.z = q.z ms.pose.orientation.w = q.w ms.model_name = obj_name rospy.wait_for_service('gazebo/set_model_state') try: self.set_model_pose(ms) except (rospy.ServiceException) as e: print("Error setting the pose of " + obj_name) else: EE_POS_TGT = np.asmatrix( [self.realgoal[0], self.realgoal[1], centerPoint]) ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], EE_POS_TGT, self.target_orientation).T) ms.pose.position.x = EE_POS_TGT[0, 0] ms.pose.position.y = EE_POS_TGT[0, 1] ms.pose.position.z = EE_POS_TGT[0, 2] ms.pose.orientation.x = 0 ms.pose.orientation.y = 0 ms.pose.orientation.z = 0 ms.pose.orientation.w = 0 self._pub_link_state.publish( LinkState(link_name="target_link", pose=ms.pose, reference_frame="world")) self.realgoal = ee_tgt
def setTargetPositions(self, msg): """ The goal is to test with randomized positions which range between the boundries of the H-ROS logo """ print("In randomize target positions.") EE_POS_TGT_RANDOM1 = np.asmatrix([ np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.2868 ]) # boundry box of the first half H-ROS letters with +-0.01 offset # EE_POS_TGT_RANDOM1 = np.asmatrix([np.random.uniform(0.2852485, 0.3883636), np.random.uniform(-0.1746508, 0.1701576), 0.3746]) # boundry box of whole box H-ROS letters with +-0.01 offset # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_random1 = EE_POS_TGT_RANDOM1 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 print("randomizeTarget realgoal: ", self.realgoal)
def randomizeTargetPose(self, obj_name): EE_POS_TGT = np.asmatrix([[round(np.random.uniform(-0.62713, -0.29082), 5), round(np.random.uniform(-0.15654, 0.15925), 5), 0.72466]]) roll = 0.0 pitch = 0.0 yaw = np.random.uniform(-1.57, 1.57) q = quat.from_euler_angles(roll, pitch, yaw) EE_ROT_TGT = rot_matrix = quat.as_rotation_matrix(q) self.target_orientation = EE_ROT_TGT EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, EE_POS_TGT, EE_ROT_TGT).T) self.realgoal = ee_tgt ms = ModelState() ms.pose.position.x = EE_POS_TGT[0,0] ms.pose.position.y = EE_POS_TGT[0,1] ms.pose.position.z = EE_POS_TGT[0,2] ms.pose.orientation.x = q.x ms.pose.orientation.y = q.y ms.pose.orientation.z = q.z ms.pose.orientation.w = q.w self._pub_link_state.publish( LinkState(link_name="target_link", pose=ms.pose, reference_frame="world") ) ms.model_name = obj_name rospy.wait_for_service('gazebo/set_model_state') try: self.set_model_pose(ms) except (rospy.ServiceException) as e: print("Error setting the pose of " + obj_name)
def randomizeObstacle(self): print("calling randomize obstacle") #EE_POS_TGT_1 = np.asmatrix([0.3239543, 0.0083323, 0.3746]) #center of R #EE_POS_TGT_1 = np.asmatrix([0.29557, -0.0422738, 0.3746]) # R top left EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571, 0.26342]) #center of S 0.26342 #EE_POS_TGT_1 = np.asmatrix([0.3349774, 0.1570571, 0.3746]) # S center #EE_POS_TGT_1 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.1326121, 0.26342]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ #self.realgoal = target1 if np.random.uniform() < 0.5 else target2 if np.random.uniform() < 0.5: self.addObstacle() #self.realgoal = target2 self.realgoal = target1 print("\n\nOBSTACLE\n\n") else: # self.removeObstacle() self.addObstacle() self.realgoal = target1 print("\n\nNO OBSTACLE\n\n") print("randomizeObstacle realgoal: ", self.realgoal)
def setTargetPose(self, pos, rot): EE_POS_TGT = pos EE_ROT_TGT = rot EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt
def tgt_callback(self,msg): # print("Whats the target?: ", msg) if self.detect_target_once is 1: print("Get the target from vision, for now just use position.") EE_POS_TGT = np.asmatrix([msg.position.x, msg.position.y, msg.position.z]) EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt = EE_POS_TGT # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 print("Predicted target is: ", self.realgoal) self.detect_target_once = 0
def randomizeMultipleTargets(self): print("calling randomize multiple targets") EE_POS_TGT_1 = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_POS_TGT_2 = np.asmatrix([0.3305805, -0.0985179, 0.3746]) # center of H right EE_POS_TGT_3 = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT_4 = np.asmatrix([0.3355224, 0.0344309, 0.3746]) # center of O left EE_POS_TGT_5 = np.asmatrix([0.3013209, 0.1647450, 0.3746]) # S top right EE_POS_TGT_6 = np.asmatrix([0.3349774, 0.1570571, 0.3746]) # S midlle EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt_1 = EE_POS_TGT_1 ee_pos_tgt_2 = EE_POS_TGT_2 ee_pos_tgt_3 = EE_POS_TGT_3 ee_pos_tgt_4 = EE_POS_TGT_4 ee_pos_tgt_5 = EE_POS_TGT_5 ee_pos_tgt_6 = EE_POS_TGT_6 # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position # ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_1, ee_rot_tgt).T) target2 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_2, ee_rot_tgt).T) target3 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_3, ee_rot_tgt).T) target4 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_4, ee_rot_tgt).T) target5 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_5, ee_rot_tgt).T) target6 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt_6, ee_rot_tgt).T) """ This is for initial test only, we need to change this in the future to be more realistic. E.g. covered target -> go to other target. This could be implemented for example with vision. """ all_targets = [target1, target2, target3, target4, target5, target6] self.realgoal = random.choice(all_targets) # self.realgoal = target1 if np.random.uniform() < 0.5 else target2 print("randomizeCorrect realgoal: ", self.realgoal)
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "MAIRATop3DOF_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.2, 0.2, 0.2868]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0., 0., 0, 0]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/maira_controller/command' JOINT_SUBSCRIBER = '/maira_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' MOTOR6_JOINT = 'motor6' # Set constants for links BASE = 'maira_base_link' MAIRA_MOTOR1_LINK = 'motor1_link' MAIRA_MOTOR2_LINK = 'motor2_link' MAIRA_MOTOR3_LINK = 'motor3_link' MAIRA_MOTOR4_LINK = 'motor4_link' MAIRA_MOTOR5_LINK = 'motor5_link' MAIRA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT] LINK_NAMES = [BASE, MAIRA_MOTOR1_LINK, MAIRA_MOTOR2_LINK, MAIRA_MOTOR3_LINK, MAIRA_MOTOR4_LINK, MAIRA_MOTOR5_LINK, MAIRA_MOTOR6_LINK, EE_LINK] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = rospkg.RosPack().get_path("maira_description") + "/urdf/maira_demo_camera_side.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 """ # self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # Seed the environment # Seed the environment self._seed()
def init_3dof_robot(self): print("I am in enviroment 3DOF") self._observation_msg = None print(self._observation_msg) self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 self._time_lock = threading.RLock() self.choose_robot = 0 self.environment = None ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'reset_conditions': reset_condition, 'tree_path': self.urdf_path, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # Topics for the robot publisher and subscriber. # self.enviroment = None JOINT_PUBLISHER = '/scara_controller_3dof/command' JOINT_SUBSCRIBER = '/scara_controller_3dof/state' # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub_3dof = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub_3dof = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback)
def init_4dof_robot(self): print("I am in enviroment 4DOF") self._observation_msg_4dof = None print(self._observation_msg_4dof) self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 self.reset_jnts = False self._time_lock = threading.RLock() self.choose_robot = 1 self.environment = None ############################# # Environment hyperparams ############################# ######################################################################################## ''' Add here the stuff needed for the 4DOF robot ''' EE_POS_TGT_4DOF = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H EE_ROT_TGT_4DOF = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS_4DOF = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES_4DOF = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS_4DOF = np.array([0., 0., 0., 0.]) # JOINT_PUBLISHER_4DOF = '/scara_controller_4dof/command' # JOINT_SUBSCRIBER_4DOF = '/scara_controller_4dof/state' # joint names: MOTOR1_4DOF_JOINT = 'motor1_4dof' MOTOR2_4DOF_JOINT = 'motor2_4dof' MOTOR3_4DOF_JOINT = 'motor3_4dof' MOTOR4_4DOF_JOINT = 'motor4_4dof' # Set constants for links BASE_4DOF = '4_dof_scara_e1_base_link' BASE_4DOF_MOTOR = '4_dof_scara_e1_base_motor' SCARA_4DOF_MOTOR1 = '4_dof_scara_e1_motor1' SCARA_4DOF_INSIDE_MOTOR1 = '4_dof_scara_e1_motor1_inside' SCARA_4DOF_SUPPORT_MOTOR1 = '4_dof_scara_e1_motor1_support' SCARA_4DOF_BAR_MOTOR1 = '4_dof_scara_e1_bar1' SCARA_4DOF_FIXBAR_MOTOR1 = '4_dof_scara_e1_fixbar1' SCARA_4DOF_MOTOR2 = '4_dof_scara_e1_motor2' SCARA_4DOF_INSIDE_MOTOR2 = '4_dof_scara_e1_motor2_inside' SCARA_4DOF_SUPPORT_MOTOR2 = '4_dof_scara_e1_motor2_support' SCARA_4DOF_BAR_MOTOR2 = '4_dof_scara_e1_bar2' SCARA_4DOF_FIXBAR_MOTOR2 = '4_dof_scara_e1_fixbar2' SCARA_4DOF_MOTOR3 = '4_dof_scara_e1_motor3' SCARA_4DOF_INSIDE_MOTOR3 = '4_dof_scara_e1_motor3_inside' SCARA_4DOF_SUPPORT_MOTOR3 = '4_dof_scara_e1_motor3_support' SCARA_4DOF_BAR_MOTOR3 = '4_dof_scara_e1_bar3' SCARA_4DOF_FIXBAR_MOTOR3 = '4_dof_scara_e1_fixbar3' SCARA_4DOF_MOTOR4 = '4_dof_scara_e1_motor4' SCARA_4DOF_INSIDE_MOTOR4 = '4_dof_scara_e1_motor4_inside' SCARA_4DOF_SUPPORT_MOTOR4 = '4_dof_scara_e1_motor4_support' SCARA_4DOF_BAR_MOTOR4 = '4_dof_scara_e1_bar4' SCARA_4DOF_FIXBAR_MOTOR4 = '4_dof_scara_e1_fixbar4' SCARA_4DOF_RANGEFINDER = '4_dof_scara_e1_rangefinder' EE_LINK_4DOF = '4_dof_ee_link' JOINT_ORDER_4DOF = [ MOTOR1_4DOF_JOINT, MOTOR2_4DOF_JOINT, MOTOR3_4DOF_JOINT, MOTOR4_4DOF_JOINT ] LINK_NAMES_4DOF = [ BASE_4DOF, BASE_4DOF_MOTOR, SCARA_4DOF_MOTOR1, SCARA_4DOF_INSIDE_MOTOR1, SCARA_4DOF_SUPPORT_MOTOR1, SCARA_4DOF_BAR_MOTOR1, SCARA_4DOF_FIXBAR_MOTOR1, SCARA_4DOF_MOTOR2, SCARA_4DOF_INSIDE_MOTOR2, SCARA_4DOF_SUPPORT_MOTOR2, SCARA_4DOF_BAR_MOTOR2, SCARA_4DOF_FIXBAR_MOTOR2, SCARA_4DOF_MOTOR3, SCARA_4DOF_INSIDE_MOTOR3, SCARA_4DOF_SUPPORT_MOTOR3, SCARA_4DOF_BAR_MOTOR3, SCARA_4DOF_FIXBAR_MOTOR3, SCARA_4DOF_MOTOR4, SCARA_4DOF_INSIDE_MOTOR4, SCARA_4DOF_SUPPORT_MOTOR4, EE_LINK_4DOF ] reset_condition = { 'initial_positions': INITIAL_JOINTS_4DOF, 'initial_velocities': [] } ################################################################################################################################################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" m_joint_order = copy.deepcopy(JOINT_ORDER_4DOF) m_link_names = copy.deepcopy(LINK_NAMES_4DOF) ee_pos_tgt = EE_POS_TGT_4DOF ee_rot_tgt = EE_ROT_TGT_4DOF # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS_4DOF, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'ee_points_tgt': self.realgoal, 'joint_order': m_joint_order, 'link_names': m_link_names, 'reset_conditions': reset_condition, 'tree_path': self.urdf_path, # 'joint_publisher': m_joint_publishers, # 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS_4DOF, 'end_effector_velocities': EE_VELOCITIES_4DOF, } # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # Topics for the robot publisher and subscriber. # self.enviroment = None JOINT_PUBLISHER_4DOF = '/scara_controller_4dof/command' JOINT_SUBSCRIBER_4DOF = '/scara_controller_4dof/state' # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub_4dof = rospy.Publisher('/scara_controller_4dof/command', JointTrajectory) self._sub_4dof = rospy.Subscriber('/scara_controller_4dof/state', JointTrajectoryControllerState, self.observation_callback_4dof)
def randomizeTargetPose(self, obj_name, centerPoint=False): ms = ModelState() if not centerPoint: EE_POS_TGT = np.asmatrix([ round(np.random.uniform(-0.62713, -0.29082), 5), round(np.random.uniform(-0.15654, 0.15925), 5), self.realgoal[2] ]) roll = 0.0 pitch = 0.0 yaw = np.random.uniform(-1.57, 1.57) q = quat.from_euler_angles(roll, pitch, yaw) EE_ROT_TGT = rot_matrix = quat.as_rotation_matrix(q) self.target_orientation = EE_ROT_TGT ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], EE_POS_TGT, EE_ROT_TGT).T) ms.pose.position.x = EE_POS_TGT[0, 0] ms.pose.position.y = EE_POS_TGT[0, 1] ms.pose.position.z = EE_POS_TGT[0, 2] ms.pose.orientation.x = q.x ms.pose.orientation.y = q.y ms.pose.orientation.z = q.z ms.pose.orientation.w = q.w if obj_name != "target": # ms.model_name = obj_name # rospy.wait_for_service('gazebo/set_model_state') # try: # self.set_model_pose(ms) # except (rospy.ServiceException) as e: # print("Error setting the pose of " + obj_name) rospy.wait_for_service('/gazebo/delete_model') try: self.remove_model(obj_name) except rospy.ServiceException as e: print("Error removing model") self.spawnModel(obj_name, self.obj_path, ms.pose) else: EE_POS_TGT = np.asmatrix( [self.realgoal[0], self.realgoal[1], centerPoint]) ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], EE_POS_TGT, self.target_orientation).T) ms.pose.position.x = EE_POS_TGT[0, 0] ms.pose.position.y = EE_POS_TGT[0, 1] ms.pose.position.z = EE_POS_TGT[0, 2] ms.pose.orientation.x = 0 ms.pose.orientation.y = 0 ms.pose.orientation.z = 0 ms.pose.orientation.w = 0 # self._pub_link_state.publish( LinkState(link_name="target_link", pose=ms.pose, reference_frame="world") ) file_xml = open(self.assets_path + '/models/urdf/target_point.urdf', mode='r') model_sdf = file_xml.read() file_xml.close() rospy.wait_for_service('/gazebo/delete_model') try: self.remove_model("target") except rospy.ServiceException as e: print("Error removing model") rospy.wait_for_service('/gazebo/spawn_urdf_model') try: self.add_model_urdf(model_name="target", model_xml=model_sdf, robot_namespace="", initial_pose=ms.pose, reference_frame="world") except rospy.ServiceException as e: print('Error adding urdf model') self.realgoal = ee_tgt
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "MARATop6DOF_Collision_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms, this should reflect the lstm step size, otherwqise it restarts two times self.rand_target_thresh = 40 self.iterator = 0 self.reset_iter = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self._collision_msg = None self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([-0.40028, 0.095615, 0.72466]) # alex2 # EE_POS_TGT = np.asmatrix([-0.173762, -0.0124312, 1.60415]) # for testing collision_callback # EE_POS_TGT = np.asmatrix([-0.580238, -0.179591, 0.72466]) # rubik touching the bar # EE_ROT_TGT = np.asmatrix([[-0.00128296, 0.9999805 , 0.00611158], # [ 0.00231397, -0.0061086 , 0.99997867], # [ 0.9999965 , 0.00129708, -0.00230609]]) # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981]) # EE_POS_TGT = np.asmatrix([-0.4023037912211465, 0.15501116706606247, 0.7238499613771884]) # 200 cm from the z axis # # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) # original orientation EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position # INITIAL_JOINTS = np.array([0., 0., 1., 0., 1.57, 0.]) INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' RAND_LIGHT_PUBLISHER = '/randomizers/randomizer/light' RAND_SKY_PUBLISHER = '/randomizers/randomizer/sky' RAND_PHYSICS_PUBLISHER = '/randomizers/randomizer/physics' LINK_STATE_PUBLISHER = '/gazebo/set_link_state' RAND_OBSTACLES_PUBLISHER = '/randomizers/randomizer/obstacles' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' MOTOR6_JOINT = 'motor6' # Set constants for links TABLE = 'table' BASE = 'base_link' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT ] LINK_NAMES = [ TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = rospkg.RosPack().get_path( "mara_description") + "/urdf/mara_demo_camera_top.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'ee_point_tgt_orient': self.target_orientation, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory, queue_size=1) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState, self.collision_callback) # self._pub_rand_light = rospy.Publisher(RAND_LIGHT_PUBLISHER, stdEmpty, queue_size=1) # self._pub_rand_sky = rospy.Publisher(RAND_SKY_PUBLISHER, stdEmpty, queue_size=1) # self._pub_rand_physics = rospy.Publisher(RAND_PHYSICS_PUBLISHER, stdEmpty, queue_size=1) # self._pub_rand_obstacles = rospy.Publisher(RAND_OBSTACLES_PUBLISHER, stdEmpty, queue_size=1) self._pub_link_state = rospy.Publisher(LINK_STATE_PUBLISHER, LinkState, queue_size=1) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 recently also added quaternion to the obs, which has dimension=4 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 10 #7 #6 hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.inf * np.ones(self.scara_chain.getNrOfJoints()) # high = np.inf * np.ones(self.scara_chain.getNrOfJoints()) self.action_space = spaces.Box(low, high, dtype=np.float32) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high, dtype=np.float32) self.add_model_urdf = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.add_model_sdf = rospy.ServiceProxy('/gazebo/spawn_sdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) self.set_model_pose = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_model_state = rospy.ServiceProxy('/gazebo/get_model_state', GetModelState) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = EE_POS_TGT[0, 2] pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "world" self.assets_path = os.path.abspath( os.path.join(rospkg.RosPack().get_path("gazebo_domain_randomizer"), os.pardir)) + "/assets" file_xml = open(self.assets_path + '/models/urdf/target_point.urdf', mode='r') model_xml = file_xml.read() file_xml.close() rospy.wait_for_service('/gazebo/spawn_urdf_model') try: self.add_model_urdf(model_name="target", model_xml=model_xml, robot_namespace=robot_namespace, initial_pose=pose, reference_frame=reference_frame) except rospy.ServiceException as e: print('Error adding urdf model') # # self.obj_path = self.assets_path + '/models/sdf/rubik_cube_random.sdf' # # self.obj_path = self.assets_path + '/models/sdf/rubik_cube.sdf' # self.obj_path = self.assets_path + '/models/sdf/box.sdf' #0.067 0.067 0.067 # # self.obj_path = self.assets_path + '/models/sdf/cylinder.sdf' # radius = 0.03 length = 0.08 # # self.obj_path = self.assets_path + '/models/urdf/sphere.urdf' #radius = 0.033 # file_sdf = open(self.obj_path ,mode='r') # model_sdf = file_sdf.read() # file_sdf.close() # # rospy.wait_for_service('/gazebo/spawn_sdf_model') # # rospy.wait_for_service('/gazebo/spawn_urdf_model') # try: # # self.add_model_urdf(model_name="obj", # self.add_model_sdf(model_name="obj", # model_xml=model_sdf, # robot_namespace=robot_namespace, # initial_pose=pose, # reference_frame=reference_frame) # except rospy.ServiceException as e: # print('Error adding sdf model') self._seed()
def tgt_callback(self, msg): # print("Whats the target?: ", msg) # self.realgoal is None and self.target_orientation is None: if self.detect_target_once is 1: print("Get the target from vision, for now just use position.") EE_POS_TGT = np.asmatrix( [msg.position.x, msg.position.y, msg.position.z]) rot_quat = np.quaternion(msg.orientation.x, msg.orientation.y, msg.orientation.z, msg.orientation.w) print("rot_quat: ", rot_quat) rot_matrix = quat.as_rotation_matrix(rot_quat) print("rot_matrix: ", rot_matrix) EE_ROT_TGT = rot_matrix #np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) # rot_matrix # EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) #rot_matrix# EE_POINTS = np.asmatrix([[0, 0, 0]]) ee_pos_tgt = EE_POS_TGT # leave rotation target same since in scara we do not have rotation of the end-effector ee_rot_tgt = EE_ROT_TGT target1 = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) # self.realgoal = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt_random1, ee_rot_tgt).T) self.realgoal = target1 self.target_orientation = ee_rot_tgt print("Predicted target is: ", self.realgoal) self.detect_target_once = 0 self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"sphere0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"sphere0\"/>\ </joint>\ <gazebo reference=\"sphere0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = EE_POS_TGT[0, 2] #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="")
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "MARATop3DOF_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self.detect_target_once = 1 self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981]) EE_POS_TGT = np.asmatrix([ -0.53170885, -0.02076771, 0.74240961 ]) # 200 cm from the z axis some random target at the begining # EE_POS_TGT = np.asmatrix([pose_rubik_pred.position.x, pose_rubik_pred.position.y, pose_rubik_pred.position.z]) # print("EE_POS_TGT: ", EE_POS_TGT) # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H # EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) # EE_POS_TGT = np.asmatrix([0.0, 0.0, 1.4]) # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_ROT_TGT = np.asmatrix([[-0.99500678, 0.09835458, -0.01696725], [-0.09951792, -0.99061751, 0.09366505], [-0.00759566, 0.0948859, 0.99545918]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position # INITIAL_JOINTS = np.array([0., 0., 1.57, 0., 1.57, 0.]) # INITIAL_JOINTS = np.array([0., 0., 1.37, 0., 1.37, 0.]) INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # INITIAL_JOINTS = np.array([2.832116288509212e-05, -7.644633833070458e-05, -0.9999138952294953, -2.4499067147409903e-05, -1.5700625461089226, 1.4725492722966749e-05]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' MOTOR6_JOINT = 'motor6' # Set constants for links TABLE = 'table' BASE = 'base_link' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT ] LINK_NAMES = [ TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = rospkg.RosPack().get_path( "mara_description") + "/urdf/mara_demo_camera_top.urdf" # URDF_PATH = "/home/rkojcev/catkin_ws/src/mara/mara_description/urdf/mara_demo_camera_top.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) print("ee_tgt: ", ee_tgt) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'ee_point_tgt_orient': self.target_orientation, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) TARGET_SUBSCRIBER = '/mara/target' self._sub_tgt = rospy.Subscriber(TARGET_SUBSCRIBER, Pose, self.tgt_callback) # Instantiate CvBridge # self.bridge = CvBridge() # self._sub_image = rospy.Subscriber("/mara/rgb/image_raw", ImageMsg, self._observation_image_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 recently also added quaternion to the obs, which has dimension=4 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 10 #6 hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) # high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.inf * np.ones(self.scara_chain.getNrOfJoints()) # high = np.inf * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.add_model_sdf = rospy.ServiceProxy('/gazebo/spawn_sdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) self.pub_set_model = rospy.Publisher('/gazebo/set_model_state', ModelState, queue_size=1) self.addTarget() # Seed the environment # Seed the environment self._seed()
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name real_env.RealEnv.__init__(self) # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self.detect_target_once = 1 self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([-0.48731417, -0.00258965, 0.82467501]) EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position # INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' # joint names: MOTOR1_JOINT = 'hros_actuation_servomotor_000A3500014E2' MOTOR2_JOINT = 'hros_actuation_servomotor_000A3500014E' MOTOR3_JOINT = 'hros_actuation_servomotor_000A3500018E' MOTOR4_JOINT = 'hros_actuation_servomotor_000A3500018E2' MOTOR5_JOINT = 'hros_actuation_servomotor_000A3500016E' MOTOR6_JOINT = 'hros_actuation_servomotor_000A3500016E2' # Set constants for links TABLE = 'table' BASE = 'base_link' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT] LINK_NAMES = [TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = '/home/erle/ros_catkin_ws/src/mara/mara_description/urdf/mara_demo_camera_top.urdf' m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'ee_point_tgt_orient': self.target_orientation, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) TARGET_SUBSCRIBER = '/mara/target' self._sub_tgt = rospy.Subscriber(TARGET_SUBSCRIBER, Pose, self.tgt_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 recently also added quaternion to the obs, which has dimension=4 """ self.obs_dim = self.scara_chain.getNrOfJoints() + 10 #6 hardcode it for now # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.add_model_sdf = rospy.ServiceProxy('/gazebo/spawn_sdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) self.pub_set_model = rospy.Publisher('/gazebo/set_model_state', ModelState, queue_size=1) # Seed the environment # Seed the environment self._seed() self.kf = [] for i in range(6): # self.kf.append(KalmanFilter(initial_state_mean=0, # transition_matrices=[1], # observation_matrices=[1], # initial_state_covariance=1, # observation_covariance=5, # transition_covariance=0.001, # n_dim_obs=1, n_dim_state=1, # em_vars=['transition_covariance', 'observation_covariance', 'initial_state_covariance'])) # self.kf.append(KalmanFilter(initial_state_mean=0, # transition_matrices=[1], # observation_matrices=[1], # initial_state_covariance=1, # observation_covariance=1, # transition_covariance=0.1, # n_dim_obs=1, n_dim_state=1)) self.kf.append(RingBuffer(500))
def __init__(self, reward_type='sparse', distance_threshold=0.005, n_substeps=20, relative_control=False): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # limit the max episode step self._max_episode_steps = self.max_episode_steps # class variable that iterates to accounts for number of steps per episode self.iterator = 0 # default to 1ms, since ddpg is slow self.slowness = 1000000 self.slowness_unit = 'nsec' self.reset_jnts = True self.choose_robot = 0 self.reward_type = reward_type self.distance_threshold = distance_threshold self.n_substeps = n_substeps self.relative_control = relative_control ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H # EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.3746]) #center of S EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_3joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high, dtype=np.float32) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high, dtype=np.float32) # Seed the environment self._seed()
def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ] return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1), ]
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and ROS 2 interfaces, for now communicating throught a bridge. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara3_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0.0, 0.0, 0.0]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # # Set the number of seconds per step of a sample. # TIMESTEP = 0.01 # Typically 0.01. # # Set the number of timesteps per sample. # STEP_COUNT = 100 # Typically 100. # # Set the number of samples per condition. # SAMPLE_COUNT = 5 # Typically 5. # # Set the number of trajectory iterations to collect. # ITERATIONS = 20 # Typically 10. # How much time does it take to execute the trajectory (in seconds) slowness = 1000000 # 1 is real life simulation # slowness = 1 # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" #URDF_PATH = "../assets/urdf/modular_scara/scara_e1_3joints.urdf" import rospkg r = rospkg.RosPack() path = r.get_path('scara_e1_description') URDF_PATH = path + "/urdf/scara_e1_3joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.environment = { # 'dt': TIMESTEP, 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions # rclpy.init(args=None) # self.ros2_node = rclpy.create_node('robot_ai_node') # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER) # # self._callbacks = partial(self.observation_callback, robot_id=0) # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data) # # self._time_lock = threading.RLock() # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self.step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = 9 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # Seed the environment self._seed()
def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ # Take an observation # done = False obs_message = self._observation_msg if obs_message is None: # print("last_observations is empty") return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here # current_quaternion = quaternion_from_matrix(rotation_matrix) # tgt_quartenion = quaternion_from_matrix(self.target_orientation) current_quaternion = quat.from_rotation_matrix(rotation_matrix) # print("current_quaternion: ", current_quaternion) tgt_quartenion = quat.from_rotation_matrix(self.target_orientation) # A = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0] # this is wrong!!!! Substraction should be replaced by: quat_error = current_quaternion * tgt_quartenion.conjugate() # quat_error = current_quaternion - tgt_quartenion quat_error = current_quaternion * tgt_quartenion.conjugate() # convert quat to np arrays quat_error = quat.as_float_array(quat_error) # RK: revisit this later, we only take one angle difference here! angle_diff = 2 * np.arccos(np.clip(quat_error[..., 0], -1., 1.)) # print("quat error: ", quat_error) # print("quat_error[..., 0]: ", quat_error[..., 0]) # print("quat_error: ",quat_error) # print("angle_diff: ", angle_diff) # print("self.realgoal: ", self.realgoal) # print("curr quat: ", current_quaternion) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(angle_diff, -1), np.reshape(ee_velocities, -1), ] # print("quat_error: ", quat_error) # print("ee_points:", ee_points) # print("angle_diff: ", angle_diff) return np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(angle_diff, -1), np.reshape(ee_velocities, -1), ]
def step(self, action): state = [] reward = [] done = False data_arm_state = None while data_arm_state is None: try: data_arm_state = rospy.wait_for_message('/ariac/arm/state', JointTrajectoryControllerState, timeout=5) except: pass data_arm_state2 = JointTrajectoryControllerState() data_arm_state2.joint_names = data_arm_state.joint_names[1:] data_arm_state2.desired.positions = data_arm_state.desired.positions[1:] data_arm_state2.desired.velocities = data_arm_state.desired.velocities[1:] data_arm_state2.desired.accelerations = data_arm_state.desired.accelerations[1:] data_arm_state2.actual.positions = data_arm_state.actual.positions[1:] data_arm_state2.actual.velocities = data_arm_state.actual.velocities[1:] data_arm_state2.actual.accelerations = data_arm_state.actual.accelerations[1:] data_arm_state2.error.positions = data_arm_state.error.positions[1:] data_arm_state2.error.velocities = data_arm_state.error.velocities[1:] data_arm_state2.error.accelerations = data_arm_state.error.accelerations[1:] # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(data_arm_state2, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: # print(self.environment['link_names'][-1]) trans, rot = forward_kinematics(self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) # # rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # # # current_quaternion = np.array([angle]+dir.tolist())# # I need this calculations for the new reward function, need to send them back to the run scara or calculate them here current_quaternion = quaternion_from_matrix(rotation_matrix) current_ee_tgt = np.ndarray.flatten(get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal#self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians(ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities(ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(ee_velocities, -1),] self._pub.publish(self.get_trajectory_message([0.018231524968342513, 3.2196073949389703 + np.random.rand(1)[0] - .5, -0.6542426695478509 + np.random.rand(1)[0] - .5, 1.7401498071871018 + np.random.rand(1)[0] - .5, 3.7354939354077596 + np.random.rand(1)[0] - .5, -1.510707301072792 + np.random.rand(1)[0] - .5, 0.00014565660628829136 + np.random.rand(1)[0] - .5])) data_vacuum_state = None while data_vacuum_state is None: try: data_vacuum_state = rospy.wait_for_message('/ariac/gripper/state', VacuumGripperState, timeout=5) except: pass data_camera = None while data_camera is None: try: data_camera = rospy.wait_for_message('/ariac/logical_camera_1', LogicalCameraImage, timeout=5) except: pass try: self.vacuum_gripper_control(enable=bool(state[0])) except (rospy.ServiceException) as e: print ("/gazebo/reset_simulation service call failed") state = [data_arm_state, data_camera, data_vacuum_state] return state, reward, done, {}
def take_observation(self): """ Take observation from the environment and return it. TODO: define return type """ obs_message = self._observation_msg if obs_message is None: return None # Collect the end effector points and velocities in # cartesian coordinates for the process_observationsstate. # Collect the present joint angles and velocities from ROS for the state. last_observations = self.process_observations(obs_message, self.environment) # # # Get Jacobians from present joint angles and KDL trees # # # The Jacobians consist of a 6x6 matrix getting its from from # # # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw]) ee_link_jacobians = self.get_jacobians(last_observations) if self.environment['link_names'][-1] is None: print("End link is empty!!") return None else: trans, rot = forward_kinematics( self.scara_chain, self.environment['link_names'], last_observations[:self.scara_chain.getNrOfJoints()], base_link=self.environment['link_names'][0], end_link=self.environment['link_names'][-1]) rotation_matrix = np.eye(4) rotation_matrix[:3, :3] = rot rotation_matrix[:3, 3] = trans # angle, dir, _ = rotation_from_matrix(rotation_matrix) # current_quaternion = np.array([angle]+dir.tolist())# # # I need this calculations for the new reward function, need to send them back to the run mara or calculate them here # current_quaternion = quaternion_from_matrix(rotation_matrix) # tgt_quartenion = quaternion_from_matrix(self.target_orientation) current_quaternion = tf.quaternions.mat2quat(rot) #[w, x, y ,z] tgt_quartenion = tf.quaternions.mat2quat(self.target_orientation) # A = np.vstack([current_quaternion, np.ones(len(current_quaternion))]).T #quat_error = np.linalg.lstsq(A, tgt_quartenion)[0] quat_error = current_quaternion * tgt_quartenion.conjugate() vec, angle = tf.quaternions.quat2axangle(quat_error) rot_vec_err = [vec[0], vec[1], vec[2], np.float64(angle)] # convert quat to np arrays # quat_error = np.asarray(quat_error, dtype=np.quaternion).view((np.double, 4)) # RK: revisit this later, we only take one angle difference here! # angle_diff = 2 * np.arccos(np.clip(quat_error[..., 0], -1., 1.)) current_ee_tgt = np.ndarray.flatten( get_ee_points(self.environment['end_effector_points'], trans, rot).T) ee_points = current_ee_tgt - self.realgoal #self.environment['ee_points_tgt'] ee_points_jac_trans, _ = self.get_ee_points_jacobians( ee_link_jacobians, self.environment['end_effector_points'], rot) ee_velocities = self.get_ee_points_velocities( ee_link_jacobians, self.environment['end_effector_points'], rot, last_observations) # Concatenate the information that defines the robot state # vector, typically denoted asrobot_id 'x'. state = np.r_[np.reshape(last_observations, -1), np.reshape(ee_points, -1), np.reshape(rot_vec_err, -1), np.reshape(ee_velocities, -1), ] return state
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name #gazebo_env.GazeboEnv.__init__(self, "ModularScara3_Obstacles_v0.launch") gazebo_env.GazeboEnv.__init__(self, "ModularScara3_Obstacles_urdf_simplified_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self._collision_msg = None ### self._torque_msg = None ### self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # limit the max episode step # class variable that iterates to accounts for number of steps per episode self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.mod = 100 self.init_torque = 0.0 self.init_torque_array = np.zeros(3) self.ee_point_matrix = [] ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3349774, 0.1570571, 0.26342]) # 26342 center of S # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) # center of O # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # make sure that the slowness is set properly!! In case we forget them defaults to seconds. # if self.slowness is None: # slowness = 1 # # else: # slowness = self.slowness # slowness = 100000000 # 1 is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' MOTOR3_TORQUE_SUBSCRIBER = '/motor3_torque' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base' # BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' # SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' # SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' # SCARA_BAR_MOTOR1 = 'scara_e1_bar1' # SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' # SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' # SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' # SCARA_BAR_MOTOR2 = 'scara_e1_bar2' # SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' # SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' # SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' # SCARA_BAR_MOTOR3 = 'scara_e1_bar3' # SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [SCARA_MOTOR1,SCARA_MOTOR2, SCARA_MOTOR3, SCARA_RANGEFINDER, EE_LINK] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# rospack = rospkg.RosPack() prefix_path = rospack.get_path('scara_e1_description') if(prefix_path==None): print("I can't find scara_e1_description") sys.exit(0) URDF_PATH = prefix_path + "/urdf/scara_e1_3joints_simplified.urdf" #URDF_PATH = prefix_path + "/urdf/scara_e1_3joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # self._sub_coll = rospy.Subscriber('/gazebo_contacts',ContactState, self.collision_callback) ## self._sub_torque = rospy.Subscriber('/motor3_torque',WrenchStamped, self.torque_callback) ## # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. # print("self.environment['link_names'][0]:", self.environment['link_names'][0]) # print("self.environment['link_names'][-1]:", self.environment['link_names'][-1]) self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] print("nr of joints: ", self.scara_chain.getNrOfJoints()) # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() #low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) #high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0,0]; pose.position.y = EE_POS_TGT[0,1]; pose.position.z = 0.055#EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0; pose.orientation.y= 0; pose.orientation.z = 0; pose.orientation.w = 0; reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") #self.addObstacle() # Seed the environment self._seed()
def __init__(self): self.slowness = 1 self.slowness_unit = 'sec' # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ARIACPick_v0.launch") JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # Set constants for joints SHOULDER_PAN_JOINT = 'shoulder_pan_joint' SHOULDER_LIFT_JOINT = 'shoulder_lift_joint' ELBOW_JOINT = 'elbow_joint' WRIST_1_JOINT = 'wrist_1_joint' WRIST_2_JOINT = 'wrist_2_joint' WRIST_3_JOINT = 'wrist_3_joint' # Set constants for links BASE = 'base' BASE_LINK = 'base_link' SHOULDER_LINK = 'shoulder_link' UPPER_ARM_LINK = 'upper_arm_link' FOREARM_LINK = 'forearm_link' WRIST_1_LINK = 'wrist_1_link' WRIST_2_LINK = 'wrist_2_link' WRIST_3_LINK = 'wrist_3_link' EE_LINK = 'ee_link' JOINT_ORDER = [SHOULDER_PAN_JOINT, SHOULDER_LIFT_JOINT, ELBOW_JOINT, WRIST_1_JOINT, WRIST_2_JOINT, WRIST_3_JOINT] # JOINT_ORDER= ['linear_arm_actuator_joint', 'shoulder_pan_joint', 'shoulder_lift_joint', 'elbow_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint'] LINK_NAMES = [BASE, BASE_LINK, SHOULDER_LINK, UPPER_ARM_LINK, FOREARM_LINK, WRIST_1_LINK, WRIST_2_LINK, WRIST_3_LINK, EE_LINK] LINK_NAMES2 = [ BASE, BASE_LINK, SHOULDER_LINK, UPPER_ARM_LINK, FOREARM_LINK, WRIST_1_LINK, WRIST_2_LINK, WRIST_3_LINK, EE_LINK ] EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.3746]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) INITIAL_JOINTS = np.array([0., 0., 0.]) STEP_COUNT = 2 # Typically 100. reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/erle/ros_scara/ur10.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten(get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } rospy.wait_for_service('/ariac/gripper/control') self.vacuum_gripper_control = rospy.ServiceProxy('/ariac/gripper/control', VacuumGripperControl) self._pub = rospy.Publisher('/ariac/arm/command', JointTrajectory) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain(self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self.step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = self.scara_chain.getNrOfJoints() + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi/2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # Seed the environment self._seed()
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "ModularScara4_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 100 # this is specific parameter for the acktr algorithm. Not used in ppo1, trpo... self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.4868]) # center of O EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([2.0, -2.0, -2.0, 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. slowness = 10000000 # 1 is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' # Set constants for links BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' SCARA_MOTOR4 = 'scara_e1_motor4' SCARA_INSIDE_MOTOR4 = 'scara_e1_motor4_inside' SCARA_SUPPORT_MOTOR4 = 'scara_e1_motor4_support' SCARA_BAR_MOTOR4 = 'scara_e1_bar4' SCARA_FIXBAR_MOTOR4 = 'scara_e1_fixbar4' SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, SCARA_BAR_MOTOR3, SCARA_FIXBAR_MOTOR3, SCARA_MOTOR4, SCARA_INSIDE_MOTOR4, SCARA_SUPPORT_MOTOR4, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/modular_scara/scara_e1_4joints.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.environment = { 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # # Gazebo specific services to start/stop its behavior and # # facilitate the overall RL environment # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # Seed the environment self._seed()
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "NewArticulatedArm_v1.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.2, 0.2, 0.2868]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0., 0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' # Set constants for links BASE = 'new_arm_base_link' NEW_ART_MOTOR1 = 'motor1_link' NEW_ART_MOTOR2 = 'link1' NEW_ART_MOTOR3 = 'motor3_link' NEW_ART_MOTOR4 = 'motor4_link' NEW_ART_MOTOR5 = 'motor5_link' # EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT ] LINK_NAMES = [ BASE, NEW_ART_MOTOR1, NEW_ART_MOTOR2, NEW_ART_MOTOR3, NEW_ART_MOTOR4, NEW_ART_MOTOR5 ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = "/home/rkojcev/devel/ros_rl/environments/gym-gazebo/gym_gazebo/envs/assets/urdf/new_articulated/new_arm.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/new_arm_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/new_arm_controller/state', JointTrajectoryControllerState, self.observation_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 6 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # # Gazebo specific services to start/stop its behavior and # # facilitate the overall RL environment # self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) # self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) # self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) # self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) # self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) # model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = 0.055 #EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") #self.addObstacle() # Seed the environment # Seed the environment self._seed()
def __init__(self): """ Initialize the SCARA environemnt NOTE: This environment uses ROS and interfaces. TODO: port everything to ROS 2 natively """ # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "MARATop6DOF_Collision_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None self.max_episode_steps = 1000 # now used in all algorithms self.iterator = 0 # default to seconds self.slowness = 1 self.slowness_unit = 'sec' self.reset_jnts = True self._collision_msg = None self._time_lock = threading.RLock() ############################# # Environment hyperparams ############################# # target, where should the agent reach # EE_POS_TGT = np.asmatrix([-0.40028, 0.095615, 0.72466]) # alex2 EE_POS_TGT = np.asmatrix([-0.580238, -0.179591, 0.72466]) # rubik touching the bar # EE_ROT_TGT = np.asmatrix([[-0.00128296, 0.9999805 , 0.00611158], # [ 0.00231397, -0.0061086 , 0.99997867], # [ 0.9999965 , 0.00129708, -0.00230609]]) # EE_POS_TGT = np.asmatrix([-0.390768, 0.0101776, 0.725335]) # 200 cm from the z axis # EE_POS_TGT = np.asmatrix([0.0, 0.001009, 1.64981]) # EE_POS_TGT = np.asmatrix([-0.4023037912211465, 0.15501116706606247, 0.7238499613771884]) # 200 cm from the z axis # # EE_POS_TGT = np.asmatrix([0.3305805, -0.1326121, 0.4868]) # center of the H # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) # EE_ROT_TGT = np.asmatrix([[-0.99521107, 0.09689605, -0.01288708], # [-0.09768035, -0.99077857, 0.09389558], # [-0.00367013, 0.09470474, 0.99549864]]) # EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_ROT_TGT = np.asmatrix([[0.79660969, -0.51571238, 0.31536287], [0.51531424, 0.85207952, 0.09171542], [-0.31601302, 0.08944959, 0.94452874]]) # original orientation EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position # INITIAL_JOINTS = np.array([0., 0., 1., 0., 1.57, 0.]) INITIAL_JOINTS = np.array([0., 0., 0., 0., 0., 0.]) # Used to initialize the robot, #TODO, clarify this more # STEP_COUNT = 2 # Typically 100. # slowness = 10000000 # 10 ms, where 1 second is real life simulation # slowness = 1000000 # 1 ms, where 1 second is real life simulation # slowness = 1 # use >10 for running trained network in the simulation # slowness = 10 # use >10 for running trained network in the simulation # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/mara_controller/command' JOINT_SUBSCRIBER = '/mara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' MOTOR4_JOINT = 'motor4' MOTOR5_JOINT = 'motor5' MOTOR6_JOINT = 'motor6' # Set constants for links TABLE = 'table' BASE = 'base_link' MARA_MOTOR1_LINK = 'motor1_link' MARA_MOTOR2_LINK = 'motor2_link' MARA_MOTOR3_LINK = 'motor3_link' MARA_MOTOR4_LINK = 'motor4_link' MARA_MOTOR5_LINK = 'motor5_link' MARA_MOTOR6_LINK = 'motor6_link' EE_LINK = 'ee_link' # EE_LINK = 'ee_link' JOINT_ORDER = [ MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT, MOTOR4_JOINT, MOTOR5_JOINT, MOTOR6_JOINT ] LINK_NAMES = [ TABLE, BASE, MARA_MOTOR1_LINK, MARA_MOTOR2_LINK, MARA_MOTOR3_LINK, MARA_MOTOR4_LINK, MARA_MOTOR5_LINK, MARA_MOTOR6_LINK, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# # TODO: fix this and make it relative # Set the path of the corresponding URDF file from "assets" URDF_PATH = rospkg.RosPack().get_path( "mara_description") + "/urdf/mara_demo_camera_top.urdf" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.realgoal = ee_tgt self.target_orientation = ee_rot_tgt self.environment = { # rk changed this to for the mlsh # 'ee_points_tgt': ee_tgt, 'ee_points_tgt': self.realgoal, 'ee_point_tgt_orient': self.target_orientation, 'joint_order': m_joint_order, 'link_names': m_link_names, # 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher(JOINT_PUBLISHER, JointTrajectory) self._sub = rospy.Subscriber(JOINT_SUBSCRIBER, JointTrajectoryControllerState, self.observation_callback) self._sub_coll = rospy.Subscriber('/gazebo_contacts', ContactState, self.collision_callback) # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # observation = self.take_observation() # assert not done # self.obs_dim = observation.size """ obs_dim is defined as: num_dof + end_effector_points=3 + end_effector_velocities=3 end_effector_points and end_effector_velocities is constant and equals 3 recently also added quaternion to the obs, which has dimension=4 """ # self.obs_dim = self.scara_chain.getNrOfJoints( ) + 7 #6 hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.pi * np.ones(self.scara_chain.getNrOfJoints()) # high = np.pi * np.ones(self.scara_chain.getNrOfJoints()) # low = -np.inf * np.ones(self.scara_chain.getNrOfJoints()) # high = np.inf * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) self.remove_model = rospy.ServiceProxy('/gazebo/delete_model', DeleteModel) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.reset_world = rospy.ServiceProxy('/gazebo/reset_world', Empty) model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = EE_POS_TGT[0, 2] #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="") # Seed the environment # Seed the environment self._seed()
def __init__(self): # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "Box_Vision_v0.launch") # TODO: cleanup this variables, remove the ones that aren't used # class variables self._observation_msg = None self._camera_image = None self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None self.obs = None self.reward = None self.done = None self.reward_dist = None self.reward_ctrl = None self.action_space = None ############################# # Environment hyperparams ############################# # target, where should the agent reach EE_POS_TGT = np.asmatrix([0.3325683, 0.0657366, 0.3746]) EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) EE_POINTS = np.asmatrix([[0, 0, 0]]) EE_VELOCITIES = np.asmatrix([[0, 0, 0]]) # Initial joint position INITIAL_JOINTS = np.array([0, 0, 0]) # Used to initialize the robot, #TODO, clarify this more STEP_COUNT = 2 # Typically 100. # # Set the number of seconds per step of a sample. # TIMESTEP = 0.01 # Typically 0.01. # # Set the number of timesteps per sample. # STEP_COUNT = 100 # Typically 100. # # Set the number of samples per condition. # SAMPLE_COUNT = 5 # Typically 5. # # Set the number of trajectory iterations to collect. # ITERATIONS = 20 # Typically 10. # How much time does it take to execute the trajectory (in seconds) slowness = 2 # Topics for the robot publisher and subscriber. JOINT_PUBLISHER = '/scara_controller/command' JOINT_SUBSCRIBER = '/scara_controller/state' # joint names: MOTOR1_JOINT = 'motor1' MOTOR2_JOINT = 'motor2' MOTOR3_JOINT = 'motor3' # Set constants for links WORLD = "world" BASE = 'scara_e1_base_link' BASE_MOTOR = 'scara_e1_base_motor' # SCARA_MOTOR1 = 'scara_e1_motor1' SCARA_INSIDE_MOTOR1 = 'scara_e1_motor1_inside' SCARA_SUPPORT_MOTOR1 = 'scara_e1_motor1_support' SCARA_BAR_MOTOR1 = 'scara_e1_bar1' SCARA_FIXBAR_MOTOR1 = 'scara_e1_fixbar1' # SCARA_MOTOR2 = 'scara_e1_motor2' SCARA_INSIDE_MOTOR2 = 'scara_e1_motor2_inside' SCARA_SUPPORT_MOTOR2 = 'scara_e1_motor2_support' SCARA_BAR_MOTOR2 = 'scara_e1_bar2' SCARA_FIXBAR_MOTOR2 = 'scara_e1_fixbar2' # SCARA_MOTOR3 = 'scara_e1_motor3' SCARA_INSIDE_MOTOR3 = 'scara_e1_motor3_inside' SCARA_SUPPORT_MOTOR3 = 'scara_e1_motor3_support' SCARA_BAR_MOTOR3 = 'scara_e1_bar3' SCARA_FIXBAR_MOTOR3 = 'scara_e1_fixbar3' # SCARA_RANGEFINDER = 'scara_e1_rangefinder' EE_LINK = 'ee_link' JOINT_ORDER = [MOTOR1_JOINT, MOTOR2_JOINT, MOTOR3_JOINT] LINK_NAMES = [ BASE, BASE_MOTOR, SCARA_MOTOR1, SCARA_INSIDE_MOTOR1, SCARA_SUPPORT_MOTOR1, SCARA_BAR_MOTOR1, SCARA_FIXBAR_MOTOR1, SCARA_MOTOR2, SCARA_INSIDE_MOTOR2, SCARA_SUPPORT_MOTOR2, SCARA_BAR_MOTOR2, SCARA_FIXBAR_MOTOR2, SCARA_MOTOR3, SCARA_INSIDE_MOTOR3, SCARA_SUPPORT_MOTOR3, EE_LINK ] reset_condition = { 'initial_positions': INITIAL_JOINTS, 'initial_velocities': [] } ############################# ############################# rospack = rospkg.RosPack() prefix_path = rospack.get_path('scara_e1_description') if (prefix_path == None): print("I can't find scara_e1_description") sys.exit(0) URDF_PATH = prefix_path + "/urdf/scara_e1_model_3joints_simplified_camera_behind.urdf.xacro" m_joint_order = copy.deepcopy(JOINT_ORDER) m_link_names = copy.deepcopy(LINK_NAMES) m_joint_publishers = copy.deepcopy(JOINT_PUBLISHER) m_joint_subscribers = copy.deepcopy(JOINT_SUBSCRIBER) ee_pos_tgt = EE_POS_TGT ee_rot_tgt = EE_ROT_TGT # Initialize target end effector position ee_tgt = np.ndarray.flatten( get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T) self.environment = { # 'dt': TIMESTEP, 'T': STEP_COUNT, 'ee_points_tgt': ee_tgt, 'joint_order': m_joint_order, 'link_names': m_link_names, 'slowness': slowness, 'reset_conditions': reset_condition, 'tree_path': URDF_PATH, 'joint_publisher': m_joint_publishers, 'joint_subscriber': m_joint_subscribers, 'end_effector_points': EE_POINTS, 'end_effector_velocities': EE_VELOCITIES, # 'num_samples': SAMPLE_COUNT, } # self.spec = {'timestep_limit': 5, 'reward_threshold': 950.0,} # Subscribe to the appropriate topics, taking into account the particular robot # ROS 1 implementation self._pub = rospy.Publisher('/scara_controller/command', JointTrajectory) self._sub = rospy.Subscriber('/scara_controller/state', JointTrajectoryControllerState, self.observation_callback) self._image_sub = rospy.Subscriber('/scara/camera/image_raw', Image, self.camera_callback) #self._sub = rospy.Subscriber('/scara/camera', JointTrajectoryControllerState, self.observation_callback) # # ROS 2 implementation, includes initialization of the appropriate ROS 2 abstractions # rclpy.init(args=None) # self.ros2_node = rclpy.create_node('robot_ai_node') # self._pub = ros2_node.create_publisher(JointTrajectory, JOINT_PUBLISHER) # # self._callbacks = partial(self.observation_callback, robot_id=0) # self._sub = ros2_node.create_subscription(JointTrajectoryControllerState, JOINT_SUBSCRIBER, self.observation_callback, qos_profile=qos_profile_sensor_data) # # self._time_lock = threading.RLock() # Initialize a tree structure from the robot urdf. # note that the xacro of the urdf is updated by hand. # The urdf must be compiled. _, self.ur_tree = treeFromFile(self.environment['tree_path']) # Retrieve a chain structure between the base and the start of the end effector. self.scara_chain = self.ur_tree.getChain( self.environment['link_names'][0], self.environment['link_names'][-1]) # print("nr of jnts: ", self.scara_chain.getNrOfJoints()) # Initialize a KDL Jacobian solver from the chain. self.jac_solver = ChainJntToJacSolver(self.scara_chain) #print(self.jac_solver) self._observations_stale = [False for _ in range(1)] #print("after observations stale") self._currently_resetting = [False for _ in range(1)] self.reset_joint_angles = [None for _ in range(1)] # TODO review with Risto, we might need the first observation for calling _step() # # taken from mujoco in OpenAi how to initialize observation space and action space. # observation, _reward, done, _info = self._step(np.zeros(self.scara_chain.getNrOfJoints())) # assert not done # self.obs_dim = observation.size self.obs_dim = 9 # hardcode it for now # # print(observation, _reward) # # Here idially we should find the control range of the robot. Unfortunatelly in ROS/KDL there is nothing like this. # # I have tested this with the mujoco enviroment and the output is always same low[-1.,-1.], high[1.,1.] # #bounds = self.model.actuator_ctrlrange.copy() low = -np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) high = np.pi / 2.0 * np.ones(self.scara_chain.getNrOfJoints()) # print("Action spaces: ", low, high) self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # self.action_space = spaces.Discrete(3) #F,L,R # self.reward_range = (-np.inf, np.inf) # Gazebo specific services to start/stop its behavior and # facilitate the overall RL environment self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.add_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) # Seed the environment self._seed() model_xml = "<?xml version=\"1.0\"?> \ <robot name=\"myfirst\"> \ <link name=\"world\"> \ </link>\ <link name=\"cylinder0\">\ <visual>\ <geometry>\ <sphere radius=\"0.01\"/>\ </geometry>\ <origin xyz=\"0 0 0\"/>\ <material name=\"rojotransparente\">\ <ambient>0.5 0.5 1.0 0.1</ambient>\ <diffuse>0.5 0.5 1.0 0.1</diffuse>\ </material>\ </visual>\ <inertial>\ <mass value=\"5.0\"/>\ <inertia ixx=\"1.0\" ixy=\"0.0\" ixz=\"0.0\" iyy=\"1.0\" iyz=\"0.0\" izz=\"1.0\"/>\ </inertial>\ </link>\ <joint name=\"world_to_base\" type=\"fixed\"> \ <origin xyz=\"0 0 0\" rpy=\"0 0 0\"/>\ <parent link=\"world\"/>\ <child link=\"cylinder0\"/>\ </joint>\ <gazebo reference=\"cylinder0\">\ <material>Gazebo/GreenTransparent</material>\ </gazebo>\ </robot>" robot_namespace = "" pose = Pose() pose.position.x = EE_POS_TGT[0, 0] pose.position.y = EE_POS_TGT[0, 1] pose.position.z = 0.055 #EE_POS_TGT[0,2]; #Static obstacle (not in original code) # pose.position.x = 0.25;# # pose.position.y = 0.07;# # pose.position.z = 0.0;# pose.orientation.x = 0 pose.orientation.y = 0 pose.orientation.z = 0 pose.orientation.w = 0 reference_frame = "" rospy.wait_for_service('/gazebo/spawn_urdf_model') self.add_model(model_name="target", model_xml=model_xml, robot_namespace="", initial_pose=pose, reference_frame="")