def collect_data(): robot = Robot() cubusm = CubesManager() for i in range(MAX_PICTURE_NUM): cubusm.reset_cube(rand=True) Box_position = cubusm.read_cube_pose("demo_cube") # print "cube position:", str(Box_position) joint, view = robot.get_state() rgb, dep = robot.get_rgb_dep() # b, g, r = cv2.split(rgb) # print view[0,0,0] # print dep # rgb = cv2.merge([r, g, b]) # print dep # plt.imshow(dep) # plt.show() rgb = cv2.resize(rgb, (224, 224)) dep = cv2.resize(dep, (224, 224)) # print dep cv2.imwrite( "/home/ljt/Desktop/images/rgb/" + str(Box_position) + ".png", rgb) # cv2.imwrite("/home/ljt/Desktop/ws/src/fetch_moveit_config/images/dep/" + str(Box_position) + ".png", dep) # a = np.array(rgb).shape # print a # print "camera image shape:", view.shape np.save("/home/ljt/Desktop/images/dep/" + str(Box_position), dep)
# start training for i in range(MAX_EPISODES): cubm.reset_cube(rand=True) Box_position = cubm.read_cube_pose("demo_cube") print "cube position:", Box_position robot.Box_position = copy.deepcopy(Box_position) now_position = robot.gripper.get_current_pose( "gripper_link").pose.position now_dis = math.sqrt( math.pow(now_position.x - robot.Box_position[0], 2) + math.pow(now_position.y - robot.Box_position[1], 2) + math.pow(now_position.z - robot.Box_position[2], 2)) robot.reward = -10 * now_dis robot.reset() s = robot.get_state() ep_r = 0. # reward of each epoch for j in range(MAX_EP_STEPS): a = rl.choose_action(s) s_, r, done = robot.step(a) number += 1 print "-------the %i step-------" % number rl.store_transition(s, a, r, s_) # print s_[0] ep_r += r if rl.memory_full: # start to learn once has fulfilled the memory rl.learn() # rl.learn()