left_pos = action.left_param_action([0., 0., 0., 0., 0., 0., 0.]) # move the joints to the new positions. limb_right.move_to_joint_positions(right_pos) limb_left.move_to_joint_positions(left_pos) # update the current joint angles (could do this by using the positions # concern, there may be variability with movement, especially if the robot is holding # a weighty object, might need to reconsider to speed up processing. # DONE determine if getting updates significantly slows and if there is a difference between calculated angles action.action_update(limb_right.joint_angles(), limb_left.joint_angles()) # get the endpoint pose for reward calculation right_reward.update_gripper(limb_right.endpoint_pose()) left_reward.update_gripper(limb_left.endpoint_pose()) # get the rewards and status right_distance, right_done = right_reward.euclidean_distance() left_distance, left_done = left_reward.euclidean_distance() print "Reward for iteration " + str(i) print right_reward.euclidean_distance() print left_reward.euclidean_distance() if left_done or right_done: # block_sample.delete_gazebo_models() quit() block_sample.delete_gazebo_models() quit()
class Baxter(object): def __init__(self): # initialize baxter node in ROS rospy.init_node('baxter_node') # create instances of baxter_interfaces's limb class self.limb_right = baxter_interface.Limb('right') self.limb_left = baxter_interface.Limb('left') # load the gazebo model for simulation block_sample.load_gazebo_models() # create a new action for both arms self.action = Action(self.limb_right.joint_angles(), self.limb_left.joint_angles()) self.neutral() # initialize the reward with goal location, param 2 is block location in sample # TODO add a 2nd block, have each hand work towards their own goal. # TODO use input from robot vision to get goal position self.right_reward = Reward(self.limb_right.endpoint_pose(), (0.6725, 0.1265, 0.7825)) self.left_reward = Reward(self.limb_left.endpoint_pose(), (0.6725, 0.1265, 0.7825)) def neutral(self): # set the arms to a neutral position self.limb_right.move_to_joint_positions( self.action.right_home_position()) self.limb_left.move_to_joint_positions( self.action.left_home_position()) def neutral_left(self): # set the arms to a neutral position self.limb_left.move_to_joint_positions( self.action.left_home_position()) def neutral_right(self): # set the arms to a neutral position self.limb_right.move_to_joint_positions( self.action.right_home_position()) def step_left(self, action): start_pos = self.left_state() # get an incremental update to the joint positions. left_pos = self.action.left_param_action(action) # move the joints to the new positions. self.limb_left.move_to_joint_positions(left_pos) # update current state of robot self.action.action_update(self.right_state(), self.left_state()) # get the endpoint pose for reward of robot self.left_reward.update_gripper(self.limb_left.endpoint_pose()) reward = self.left_reward.euclidean_distance() done = self.left_reward.is_done(reward) # return the reward and status # print reward return start_pos, left_pos, reward, done def step_right(self, action): start_pos = self.right_state() # get an incremental update to the joint positions. right_pos = self.action.right_param_action(action) # move the joints to the new positions. self.limb_right.move_to_joint_positions(right_pos) # update current state of robot self.action.action_update(self.right_state(), self.left_state()) # get the endpoint pose for reward calculation self.right_reward.update_gripper(self.limb_right.endpoint_pose()) reward = self.right_reward.euclidean_distance() done = self.right_reward.is_done(reward) # get the rewards and status return start_pos, right_pos, reward, done def random_step_left(self): init_arm_pos = self.left_state() item = 0 new_arm_pos = init_arm_pos value = [-0.1, 0, 0.1] arm_update_value = [] for i in range(7): arm_update_value.append(random.choice(value)) for key, value in new_arm_pos.iteritems(): # sets new arm position to initial position + a random value new_arm_pos[key] = init_arm_pos[key] + arm_update_value[item] item += 1 self.limb_left.move_to_joint_positions(new_arm_pos) self.action.action_update(self.right_state(), self.left_state()) self.left_reward.update_gripper(self.limb_left.endpoint_pose()) reward = self.left_reward.euclidean_distance() done = self.left_reward.is_done(reward) return new_arm_pos, arm_update_value, reward, done def random_step_right(self): init_arm_pos = self.right_state() item = 0 new_arm_pos = init_arm_pos value = [-0.1, 0, 0.1] arm_update_value = [] for i in range(7): arm_update_value.append(random.choice(value)) for key, value in new_arm_pos.iteritems(): # sets new arm position to initial position + a random value new_arm_pos[key] = init_arm_pos[key] + arm_update_value[item] item += 1 self.limb_right.move_to_joint_positions(new_arm_pos) self.action.action_update(self.right_state(), self.left_state()) self.right_reward.update_gripper(self.limb_right.endpoint_pose()) reward = self.right_reward.euclidean_distance() done = self.right_reward.is_done(reward) return new_arm_pos, arm_update_value, reward, done def right_state(self): # returns an array of the current joint angles return self.limb_right.joint_angles() def left_state(self): # returns an array of the current joint angles return self.limb_left.joint_angles() def close_env(self): block_sample.delete_gazebo_models() self.neutral() quit() def observation_space(self): low = np.array( [-1.7016, -2.147, -3.0541, -.05, -3.059, -1.5707, -3.059]) high = np.array([1.7016, 1.047, 3.0541, 2.618, 3.059, 2.094, 3.059]) return [low, high] def action_space(self): return [[-0.1, 0, 0.1], [-0.1, 0, 0.1], [-0.1, 0, 0.1], [-0.1, 0, 0.1], [-0.1, 0, 0.1], [-0.1, 0, 0.1], [-0.1, 0, 0.1]] def action_domain(self): low = -0.1 # may want to change this, add in a bigger value like .2 none = 0.0 high = 0.1 return low, none, high def reset(self, arm): self.neutral() if arm == "right": state = self.right_state() elif arm == "left": state = self.left_state() else: print "non-valid arm parameter: enter 'left' or 'right'" state = [] state_list = list(state.values()) return state_list def reset_random(self, arm): if arm == "right": self.limb_right.move_to_joint_positions( self.action.right_random_position()) state = self.right_state() elif arm == "left": self.limb_left.move_to_joint_positions( self.action.left_random_position()) state = self.left_state() else: print "non-valid arm parameter: enter 'left' or 'right'" state = [] state_list = list(state.values()) return state_list def random_start(self): # set the arms to a random start position self.limb_right.move_to_joint_positions( self.action.right_random_position()) self.limb_left.move_to_joint_positions( self.action.left_random_position())