def collect_data(): robot = Robot() cubusm = CubesManager() for i in range(MAX_PICTURE_NUM): cubusm.reset_cube(rand=True) Box_position = cubusm.read_cube_pose("demo_cube") # print "cube position:", str(Box_position) joint, view = robot.get_state() rgb, dep = robot.get_rgb_dep() # b, g, r = cv2.split(rgb) # print view[0,0,0] # print dep # rgb = cv2.merge([r, g, b]) # print dep # plt.imshow(dep) # plt.show() rgb = cv2.resize(rgb, (224, 224)) dep = cv2.resize(dep, (224, 224)) # print dep cv2.imwrite( "/home/ljt/Desktop/images/rgb/" + str(Box_position) + ".png", rgb) # cv2.imwrite("/home/ljt/Desktop/ws/src/fetch_moveit_config/images/dep/" + str(Box_position) + ".png", dep) # a = np.array(rgb).shape # print a # print "camera image shape:", view.shape np.save("/home/ljt/Desktop/images/dep/" + str(Box_position), dep)
now_dis = math.sqrt(math.pow(start_position.x - robot.Box_position[0], 2) + math.pow(start_position.y - robot.Box_position[1], 2)) # 存储夹爪距离木块的距离 robot.dis = now_dis # + math.pow(now_position.z - robot.Box_position[2], 2)) # 存储end_goal robot.end_goal = [start_position.x, start_position.y, start_position.z] s = robot.get_state() if i % 500 == 0: print "********memory counter:{0}********".format(rl.memory_counter) end = time.clock() print end-begin begin = time.clock() # 分成末端坐标和rgbd endg, view_state = s rgb, dep = robot.get_rgb_dep() for j in range(1, MAX_EP_STEPS): st += 1 a = rl.choose_action([endg, view_state]) # choose 时沿用之前的图像 s_, r, done = robot.test_step(a) # 执行一步 rl.store_transition(s, a, -r, [s_, view_state]) # 沿用之前的图像rgbd # print "the memory counter:", rl.memory_counter if rl.memory_counter > 5000: # if rl.memory_counter % 500 == 0: print "learn....." rl.learn() rw += r if done or st >= MAX_EP_STEPS-1: print("total reward:{0}, average reward:{1}\n".format(rw, rw*1.0/st)) break if i % 500 == 0: