def specify_throws(args, d, which): """ The user manually specifies how the throws should be done. Drag from the BACK to FRONT, for intuition. So the second click is where we want the needle tip to ENTER the phantom, and the first click is used to get the correct angle to which we want the needle to enter. Then we save the points, reset the list of points (that's why we need to fetch them with `global`) and do the same with the other camera image. BTW, if the user does this with a piece of paper that has lines, then we should just pull the piece of paper out once we have the correct desired points (if the paper is thin enough, the dvrk camera won't know about the height difference). """ global BACK_POINTS, FRONT_POINTS, image if which == 'left': image = np.copy(d.left['raw']) elif which == 'right': image = np.copy(d.right['raw']) imdir = args.imdir + which num_throws = 0 while num_throws < args.test: window_name = "Currently have {} throws out of {} planned. "+\ "Click and drag the NEXT direction, NOW. Or press ESC to "+\ "terminate the program.".format(num_throws, args.test) cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.setMouseCallback(window_name, click) # Record clicks to this window! cv2.resizeWindow(window_name, 1800, 2600) U.call_wait_key(cv2.imshow(window_name, image)) # Now save the image with the next available index. index = len(os.listdir(imdir)) cv2.imwrite(imdir + "/point_" + str(index).zfill(2) + ".png", image) cv2.destroyAllWindows() num_throws += 1 assert len(BACK_POINTS) == len(FRONT_POINTS) == num_throws info = { 'num_throws': num_throws, 'back': BACK_POINTS, 'front': FRONT_POINTS } BACK_POINTS = [] FRONT_POINTS = [] return info
def test(arm1, fname, test_method): """ Roll out the open loop policy. No human intervention. test_method ----------- 0: roll entirely open-loop, no human intervention. 1: roll each time step, then human provides correction (saves new file). """ positions = U.load_pickle_to_list(fname) print("loaded {} positions from {}".format(len(positions), fname)) if test_method == 0: print("We're going to run entirely open-loop.") elif test_method == 1: print("NOTE! You'll have to provide corrections after each movement.") revised_positions = [] arm1.close_gripper() for i, (pos, rot) in enumerate(positions): U.move(arm1, pos, rot) arm1.close_gripper() real_pos, real_rot = U.get_pos_rot_from_arm(arm1, nparrays=True) print("\n({}) Target position: {},{}".format(i, pos, rot)) print(" Actual position: {},{}".format(real_pos, real_rot)) if test_method == 1: string = "Now correct the position as needed, then press any key,"+ \ " other than ESC (which will terminate the entire program)." U.call_wait_key(cv2.imshow(string, d.left_image), exit=True) revised_pos, revised_rot = U.get_pos_rot_from_arm(arm1, nparrays=True) revised_positions.append((revised_pos, revised_rot)) print(" Revised position: {},{}".format(revised_pos, revised_rot)) else: time.sleep(2) if test_method == 1: new_fname = fname[:-2] + '_revised.p' print("Storing {} positions in file {}".format(len(revised_positions), new_fname)) U.store_pickle(new_fname, revised_positions)
def get_data(arm1, fname): """ Have the human press key to continue, escape to terminate. """ key = -1 positions = [] stored = 0 while True: key = U.call_wait_key(cv2.imshow( "ESC to exit and skip this; stored {} so far.".format(stored), d.left_image), exit=False) if key in U.ESC_KEYS: break pos, rot = U.get_pos_rot_from_arm(arm1, nparrays=True) positions.append((pos, rot)) print("Appended: {}".format((pos, rot))) stored += 1 if stored > 0: print("Storing {} positions in file {}".format(stored, fname)) U.store_pickle(fname, positions)
def run(args, p, img_shape, save_path): """Run one episode, record statistics, etc. """ stats = defaultdict(list) # using 15 for vismpc and to be fair, DAgger if we also used 15 ... MAX_EP_LENGTH = 15 if args.special: COVERAGE_SUCCESS = 1.50 else: COVERAGE_SUCCESS = 0.92 SS_THRESH = 0.95 dumb_correction = False freq = 0 for i in range(MAX_EP_LENGTH): print('\n*************************************') print('ON TIME STEP (I.E., ACTION) NUMBER {}'.format(i + 1)) print('*************************************\n') # ---------------------------------------------------------------------- # STEP 1: move to the tab with the camera, click ENTER. This will save # images, and also call the neural network code to produce an action. # Then, load the action. # ---------------------------------------------------------------------- results = U.get_net_results() # Results from the neural network. print('Waiting for one more result to the {} we have so far'.format( len(results))) while len(results) == i: time.sleep(1) results = U.get_net_results() assert len(results) >= i assert len(results) == i + 1, '{} vs {}, {}'.format( i, results, len(results)) # ---------------------------------------------------------------------- # STEP 2: load those images, using same code as in the network loading # code. If coverage is high enough, exit now. Note: we load 56x56 for # training but use 100x100 for computing coverage as we have several # values that are heavily tuned for that. # ---------------------------------------------------------------------- c_path_100x100 = join(C.DVRK_IMG_PATH, '{}-c_img_crop_proc.png'.format(str(i).zfill(3))) c_path = join(C.DVRK_IMG_PATH, '{}-c_img_crop_proc_56.png'.format(str(i).zfill(3))) d_path = join(C.DVRK_IMG_PATH, '{}-d_img_crop_proc_56.png'.format(str(i).zfill(3))) c_img_100x100 = cv2.imread(c_path_100x100) coverage = U.calculate_coverage(c_img_100x100) # tuned for 100x100 c_img = cv2.imread(c_path) d_img = cv2.imread(d_path) U.single_means(c_img, depth=False) U.single_means(d_img, depth=True) assert c_img.shape == d_img.shape == img_shape assert args.use_rgbd # img = np.dstack( (c_img, d_img[:,:,0]) ) # we don't call net code here # Ensures we save the final image in case we exit and get high coverage. # Make sure it happens BEFORE the `break` command below so we get final imgs. stats['coverage'].append(coverage) stats['c_img_100x100'].append(c_img_100x100) stats['c_img'].append(c_img) stats['d_img'].append(d_img) if coverage >= COVERAGE_SUCCESS: print('\nCOVERAGE SUCCESS: {:.3f} >= {:.3f}, exiting ...\n'.format( coverage, COVERAGE_SUCCESS)) break else: print('\ncurrent coverage: {:.3f}\n'.format(coverage)) # Before Jan 2020, action selection happened later. Now it happens earlier. #print(' now wait a few seconds for network to run') #time.sleep(5) # ---------------------------------------------------------------------- # STEP 3: show the output of the action to the human. HUGE ASSUMPTION: # that we can just look at the last item of `results` list. # ---------------------------------------------------------------------- action = np.loadtxt(results[-1]) print('neural net says: {}'.format(action)) stats['actions'].append(action) # ---------------------------------------------------------------------- # STEP 3.5, only if we're not on the first action, if current image is # too similar to the old one, move the target points closer towards the # center of the cloth plane. An approximation but likely 'good enough'. # It does assume the net would predict a similiar action, though ... # ---------------------------------------------------------------------- similar = False if i > 0: # AH! Go to -2 because I modified code to append (c_img,d_img) above. prev_c = stats['c_img'][-2] prev_d = stats['d_img'][-2] diff_l2_c = np.linalg.norm(c_img - prev_c) / np.prod(c_img.shape) diff_l2_d = np.linalg.norm(d_img - prev_d) / np.prod(d_img.shape) diff_ss_c = compare_ssim(c_img, prev_c, multichannel=True) diff_ss_d = compare_ssim(d_img[:, :, 0], prev_d[:, :, 0]) print('\n (c) diff L2: {:.3f}'.format(diff_l2_c)) print(' (d) diff L2: {:.3f}'.format(diff_l2_d)) print(' (c) diff SS: {:.3f}'.format(diff_ss_c)) print(' (d) diff SS: {:.3f}\n'.format(diff_ss_d)) stats['diff_l2_c'].append(diff_l2_c) stats['diff_l2_d'].append(diff_l2_d) stats['diff_ss_c'].append(diff_ss_c) stats['diff_ss_d'].append(diff_ss_d) # Apply action 'compression'? A 0.95 cutoff empirically works well. if diff_ss_c > SS_THRESH: freq += 1 print( 'NOTE structural similiarity exceeds {}'.format(SS_THRESH)) similar = True # if dumb_correction: # action[0] = action[0] * (0.9 ** freq) # action[1] = action[1] * (0.9 ** freq) # print('revised action after \'dumb compression\': {}, freq {}'.format( # action, freq)) # else: # action = action_correction(action, freq, c_img_100x100) #else: # freq = 0 # Actually just use the above for recording statistics. Always check the action. action = action_correction(action, freq, c_img_100x100, similar=similar) # ---------------------------------------------------------------------- # STEP 4. If the output would result in a dangerous position, human # stops by hitting ESC key. Otherwise, press any other key to continue. # The human should NOT normally be using this !! # ---------------------------------------------------------------------- title = '{} -- ESC TO CANCEL (Or if episode done)'.format(action) if args.use_rgbd: stacked_img = np.hstack((c_img, d_img)) exit = U.call_wait_key(cv2.imshow(title, stacked_img)) elif args.use_color: exit = U.call_wait_key(cv2.imshow(title, c_img)) else: exit = U.call_wait_key(cv2.imshow(title, d_img)) cv2.destroyAllWindows() if exit: print('Warning: why are we exiting here?') print( 'It should exit naturally due to (a) coverage or (b) time limits.' ) print( 'Make sure I clear any results that I do not want to record.') break # ---------------------------------------------------------------------- # STEP 5: Watch the robot do its action. Terminate the script if the # resulting action makes things fail spectacularly. # ---------------------------------------------------------------------- x = action[0] y = action[1] dx = action[2] dy = action[3] start_t = time.time() U.move_p_from_net_output(x, y, dx, dy, row_board=C.ROW_BOARD, col_board=C.COL_BOARD, data_square=C.DATA_SQUARE, p=p) elapsed_t = time.time() - start_t stats['act_time'].append(elapsed_t) print('Finished executing action in {:.2f} seconds.'.format(elapsed_t)) # If we ended up using all actions above, we really need one more image. # Edit: need to handle case of when we exceeded coverage after 9th action. if len(stats['c_img']) == MAX_EP_LENGTH and coverage < COVERAGE_SUCCESS: assert len(stats['coverage']) == MAX_EP_LENGTH, len(stats['coverage']) i = MAX_EP_LENGTH # Results from the neural network -- still use to check if we get a NEW image. results = U.get_net_results() print('Just get the very last image we need! (Have {} so far)'.format( len(results))) while len(results) == i: time.sleep(1) results = U.get_net_results() assert len(results) >= i assert len(results) == i + 1, '{} vs {}, {}'.format( i, results, len(results)) # Similar loading as earlier. c_path_100x100 = join(C.DVRK_IMG_PATH, '{}-c_img_crop_proc.png'.format(str(i).zfill(3))) c_path = join(C.DVRK_IMG_PATH, '{}-c_img_crop_proc_56.png'.format(str(i).zfill(3))) d_path = join(C.DVRK_IMG_PATH, '{}-d_img_crop_proc_56.png'.format(str(i).zfill(3))) c_img_100x100 = cv2.imread(c_path_100x100) coverage = U.calculate_coverage(c_img_100x100) # tuned for 100x100 c_img = cv2.imread(c_path) d_img = cv2.imread(d_path) assert c_img.shape == d_img.shape == img_shape # Record final stats. stats['coverage'].append(coverage) stats['c_img_100x100'].append(c_img_100x100) stats['c_img'].append(c_img) stats['d_img'].append(d_img) print('(for full length episode) final coverage: {:.3f}'.format( coverage)) # Final book-keeping and return statistics. stats['coverage'] = np.array(stats['coverage']) print('\nEPISODE DONE!') print(' coverage: {}'.format(stats['coverage'])) print(' len(coverage): {}'.format(len(stats['coverage']))) print(' len(c_img): {}'.format(len(stats['c_img']))) print(' len(d_img): {}'.format(len(stats['d_img']))) print(' len(actions): {}'.format(len(stats['actions']))) print('All done with episode! Saving stats to: {}'.format(save_path)) with open(save_path, 'wb') as fh: pickle.dump(stats, fh) return stats
def evaluate(arm, d, args, wrist_map_c2l, wrist_map_c2r, wrist_map_l2r): """ Stage 2. Testing stage. At a high level, move the dVRK end-effector to various locations and once again explicitly record needle tips. Steps: 1. Start with the dVRK gripping a needle. Try different orientations, to ensure that performance is orientation-invariant. This time, we should have the suturing phantom there! For now we still have the click interface to accurately compute psi. 2. To get psi, images pop up from left and right cameras. Press ESC to terminate the program. If we want to proceed, then click and drag boxes of target locations for the needle tip. 3. This time, we let the needle move. We do NOT explicitly compute an offset vector because we do not know where the dVRK's end-effector goes. """ global CENTERS, image which = 'left' if args.arm==1 else 'right' pname = args.path_offsets+'/offsets_{}_arm_data.p'.format(which) if which == 'left': R = wrist_map_c2l else: R = wrist_map_c2r offsets = U.load_pickle_to_list(pname) print("loaded offsets, length: {}".format(len(offsets))) # Do stuff here just to grip the needle and get set up for computing phi/psi. arm.open_gripper(50) name = "Left image for evaluation stage. MOVE DVRK, then press any key" U.call_wait_key( cv2.imshow(name, d.left['raw']) ) arm.close_gripper() pos,rot = U.pos_rot_arm(arm, nparrays=True) pos[2] += args.z_height U.move(arm, pos, rot) time.sleep(2) # The user now clicks stuff. LEFT CAMERA, then RIGHT CAMERA. click_stuff('left') click_stuff('right') assert len(CENTERS) == 4, "Error, len(CENTERS): {}".format(len(CENTERS)) assert len(POINTS) == 8, "Error, len(POINTS): {}".format(len(POINTS)) # New to this method: move the dVRK to a different location. print("Not moving to a new spot, for now.") # Now we can actually get phi/psi. pos_g,rot_g = U.pos_rot_arm(arm, nparrays=True) camera_tip = U.camera_pixels_to_camera_coords(CENTERS[-4], CENTERS[-2], nparrays=True) camera_grip = U.camera_pixels_to_camera_coords(CENTERS[-3], CENTERS[-1], nparrays=True) ct_h = np.concatenate( (camera_tip,np.ones(1)) ) cg_h = np.concatenate( (camera_grip,np.ones(1)) ) base_t = R.dot(ct_h) base_g = R.dot(cg_h) camera_dist = np.linalg.norm( camera_tip-camera_grip ) # Distance based on click interace (should be same as camera) and based # on dvrk, using cartesian position function (this will be different). # Unfortunately the dvrk one is the easiest to use in real applications. base_dist_click = np.linalg.norm( base_t-base_g ) base_dist_dvrk = np.linalg.norm( base_t-pos_g ) assert np.abs(base_dist_click - camera_dist) < 1e-5 # Compute phi and psi based on the STARTING configuration. phi_c, psi_c = compute_phi_and_psi(d=base_dist_click) phi_d, psi_d = compute_phi_and_psi(d=base_dist_dvrk) # -------------------------------------------------------------------------- # Compute offset vector wrt base. I don't think the order of subtraction # matters as long as in real applications, we are consistent with usage, so # in application we'd do (pos_n_tip_wrt_base)-(offset). # # NOTE: this is only the offset vector wrt the starting position that we # have, but there are two hypotheses. # # 1. That we can avoid computing these if we pre-compute before hand (as # we have code for) so maybe we don't need a click interface. # 2. To get the needle to go somewhere, it's a matter of doing the # arithmetic as specified earlier in the comments! # # For now we'll compute the two offsets wrt base because we might as well # use it to simplify the task (but this is only simplifying the detection of # the needle tip and where the dVRK grips it). # -------------------------------------------------------------------------- offset_wrt_base_click = base_t - base_g offset_wrt_base_dvrk = base_t - pos_g offset_saved = get_offset(offsets, psi=psi_c, kind='click', which=which, debug=True) print("offset_wrt_base_click: {}".format(offset_wrt_base_click)) print("offset_wrt_base_dvrk: {}".format(offset_wrt_base_dvrk)) print("offset_saved: {}".format(offset_saved))
def offsets(arm, d, args, wrist_map_c2l, wrist_map_c2r, wrist_map_l2r): """ Stage 1. 1. Before running the python script, get the setup as shown in the image in the README where the needle is on the foam and the dVRK (w/SNAP) is about to grip it. 2. Then, this code waits until the user has manipulated master tools to pick up the needle at some location. EDIT: actually it's easier to just move and then assume we translate up by a few mm. I'm getting lack of responses with the master tools, for some reason. So just move it directly and explicitly. 3. Images will pop-up. Press ESC to terminate the program. Otherwise, we click and crop: (a) the end-point of the needle AND (b) the place where the dVRK grasps the needle. The latter is mainly to see if this coincides with the dVRK's actual position. MUST BE DONE IN ORDER! 4. This saves into a pickle file by appending, so in theory we continually add data points. (The main catch is to check that if we change the setup, we're NOT counting those older points.) Repeat the process by moving the master tools again, and go to step 2. """ global CENTERS, image which = 'left' if args.arm==1 else 'right' pname = args.path_offsets+'/offsets_{}_arm_data.p'.format(which) if which == 'left': R = wrist_map_c2l else: R = wrist_map_c2r num_offsets = 0 while True: # Just an overall view before grabbing needle. OK to only see the left # image. And BTW, this is when we change the dVRK to get more data. arm.open_gripper(50) print("\nNow starting new offset with {} offsets so far...".format(num_offsets)) name = "Left image w/{} offsets so far. MOVE DVRK, press any key".format(num_offsets) U.call_wait_key( cv2.imshow(name, d.left['raw']) ) arm.close_gripper() pos,rot = U.pos_rot_arm(arm, nparrays=True) pos[2] += args.z_height U.move(arm, pos, rot) time.sleep(2) # The user now clicks stuff. LEFT CAMERA, then RIGHT CAMERA. click_stuff('left') click_stuff('right') # Compute data to store. We stored left tip, left grip, right tip, right grip. pos_g,rot_g = U.pos_rot_arm(arm, nparrays=True) assert len(CENTERS) % 4 == 0, "Error, len(CENTERS): {}".format(len(CENTERS)) camera_tip = U.camera_pixels_to_camera_coords(CENTERS[-4], CENTERS[-2], nparrays=True) camera_grip = U.camera_pixels_to_camera_coords(CENTERS[-3], CENTERS[-1], nparrays=True) # Map stuff to stuff. The `h` in `xyz_h` refers to homogeneous coordinates. ct_h = np.concatenate( (camera_tip,np.ones(1)) ) cg_h = np.concatenate( (camera_grip,np.ones(1)) ) base_t = R.dot(ct_h) base_g = R.dot(cg_h) camera_dist = np.linalg.norm( camera_tip-camera_grip ) # Distance based on click interace (should be same as camera) and based # on dvrk, using cartesian position function (this will be different). # Unfortunately the dvrk one is the easiest to use in real applications. base_dist_click = np.linalg.norm( base_t-base_g ) base_dist_dvrk = np.linalg.norm( base_t-pos_g ) assert np.abs(base_dist_click - camera_dist) < 1e-5 # Compute offset vector wrt base frame. I don't think the order of # subtraction matters as long as in real applications, we are consistent # with usage, so in application we'd do (pos_n_tip_wrt_base)-(offset). offset_wrt_base_click = base_t - base_g offset_wrt_base_dvrk = base_t - pos_g phi_c, psi_c = compute_phi_and_psi(d=base_dist_click) phi_d, psi_d = compute_phi_and_psi(d=base_dist_dvrk) # Bells and whistles. base = 'base_'+which+'_' info = {} info['pos_g_dvrk'] = pos_g info['rot_g_dvrk'] = rot_g info['camera_tip'] = camera_tip info['camera_grip'] = camera_grip info[base+'tip'] = base_t info[base+'grip'] = base_g info['camera_dist'] = camera_dist info[base+'dist_click'] = base_dist_click info[base+'dist_dvrk'] = base_dist_dvrk info[base+'offset_click'] = offset_wrt_base_click info[base+'offset_dvrk'] = offset_wrt_base_dvrk info['phi_click_deg'] = phi_c info['psi_click_mm'] = psi_c info['phi_dvrk_deg'] = phi_d info['psi_dvrk_mm'] = psi_d num_offsets += 1 U.store_pickle(fname=pname, info=info, mode='a') num_before_this = U.get_len_of_pickle(pname) print("Computed and saved {} offset vectors in this session".format(num_offsets)) print("We have {} items total (including prior sessions)".format(num_before_this)) print(" pos: {}\n rot: {}".format(pos_g, rot_g)) print(" for tip, CENTER coords (left,right): {}, {}".format(CENTERS[-4], CENTERS[-2])) print(" for grip, CENTER coords (left,right): {}, {}".format(CENTERS[-3], CENTERS[-1])) print(" camera_tip: {}".format(camera_tip)) print(" camera_grip: {}".format(camera_grip)) print(" base_{}_tip: {}".format(which, base_t)) print(" base_{}_grip: {}".format(which, base_g)) print(" camera_dist (mm): {:.2f}".format(camera_dist*1000.)) print(" base_dist_camera (mm): {:.2f}".format(base_dist_click*1000.)) print(" base_dist_dvrk (mm): {:.2f}".format(base_dist_dvrk*1000.)) print(" camera, phi: {:.2f}, psi: {:.2f}".format(phi_c, psi_c)) print(" base, phi: {:.2f}, psi: {:.2f}".format(phi_d, psi_d))
def collect_guidelines(args, arm, d): """ Gather statistics about the workspace on how safe we can set things. Save things in a pickle file specified by the `directory` parameter. Click the ESC key to exit the program and restart if I've made an error. BTW, the four poses we collect will be the four "home" positions that I use later, though with more z-coordinate offset. Some information: `yaw` must be limited in [-180,180] # But actually, [-90,90] is OK. `pitch` must be limited in [-50,50] # I _think_ ... `roll` must be limited in [-180,180] # I think ... Remember, if I change the numbers, it doesn't impact the code until re-building `guidelines.p`!! """ # Add stuff we should already know, particularly the rotation ranges. info = {} info['min_yaw'] = 30 info['max_yaw'] = 150 info['min_pitch'] = 40 info['max_pitch'] = 80 # Roll is annoying because of the values the dVRK provides. info['roll_neg_ubound'] = -140 # (-180, roll_neg_ubound) info['roll_pos_lbound'] = 140 # (roll_pos_lbound, 180) info['min_pos_roll'] = 40 info['max_pos_roll'] = 180 info['min_neg_roll'] = -180 info['max_neg_roll'] = -150 # Move arm to positions to determine approximately safe ranges for x,y,z # values. All the `pos_{lr,ll,ul,ur}` are in robot coordinates. U.call_wait_key( cv2.imshow("Left camera (move to lower right corner now!)", d.left['raw'])) info['pos_lr'], info['rot_lr'] = U.pos_rot_arm(arm) U.call_wait_key( cv2.imshow("Left camera (move to lower left corner now!)", d.left['raw'])) info['pos_ll'], info['rot_ll'] = U.pos_rot_arm(arm) U.call_wait_key( cv2.imshow("Left camera (move to upper left corner now!)", d.left['raw'])) info['pos_ul'], info['rot_ul'] = U.pos_rot_arm(arm) U.call_wait_key( cv2.imshow("Left camera (move to upper right corner now!)", d.left['raw'])) info['pos_ur'], info['rot_ur'] = U.pos_rot_arm(arm) # So P[:,0] is a vector of the x's, P[:,1] vector of y's, P[:,2] vector of z's. p_lr = np.squeeze(np.array(info['pos_lr'])) p_ll = np.squeeze(np.array(info['pos_ll'])) p_ul = np.squeeze(np.array(info['pos_ul'])) p_ur = np.squeeze(np.array(info['pos_ur'])) P = np.vstack((p_lr, p_ll, p_ul, p_ur)) # Get ranges. This is a bit of a heuristic but generally good I think. info['min_x'] = np.min([p_lr[0], p_ll[0], p_ul[0], p_ur[0]]) info['max_x'] = np.max([p_lr[0], p_ll[0], p_ul[0], p_ur[0]]) info['min_y'] = np.min([p_lr[1], p_ll[1], p_ul[1], p_ur[1]]) info['max_y'] = np.max([p_lr[1], p_ll[1], p_ul[1], p_ur[1]]) # For z, we fit a plane. See https://stackoverflow.com/a/1400338/3287820 # Find (alpha, beta, gamma) s.t. f(x,y) = alpha*x + beta*y + gamma = z. A = np.zeros((3, 3)) # Must be symmetric! A[0, 0] = np.sum(P[:, 0] * P[:, 0]) A[0, 1] = np.sum(P[:, 0] * P[:, 1]) A[0, 2] = np.sum(P[:, 0]) A[1, 0] = np.sum(P[:, 0] * P[:, 1]) A[1, 1] = np.sum(P[:, 1] * P[:, 1]) A[1, 2] = np.sum(P[:, 1]) A[2, 0] = np.sum(P[:, 0]) A[2, 1] = np.sum(P[:, 1]) A[2, 2] = P.shape[0] b = np.array([ np.sum(P[:, 0] * P[:, 2]), np.sum(P[:, 1] * P[:, 2]), np.sum(P[:, 2]) ]) x = np.linalg.inv(A).dot(b) info['z_alpha'] = x[0] info['z_beta'] = x[1] info['z_gamma'] = x[2] # Sanity checks before saving stuff. assert info['min_x'] < info['max_x'] assert info['min_y'] < info['max_y'] assert P.shape == (4, 3) print("\nThe key/val pairings for {}.".format(args.directory)) keys = sorted(info.keys()) for key in keys: print("{:20} ==> {}".format(key, info[key])) print("") print("P:\n{}".format(P)) print("A:\n{}".format(A)) print("x:\n{}".format(x)) print("b:\n{}".format(b)) U.store_pickle(fname=args.directory, info=info, mode='w')
def run(args, cam, p): """Run one episode, record statistics, etc.""" stats = defaultdict(list) COVERAGE_SUCCESS = 0.92 exponent = 0 for i in range(args.max_ep_length): print('\n*************************************') print('ON TIME STEP (I.E., ACTION) NUMBER {}'.format(i + 1)) print('*************************************\n') # ---------------------------------------------------------------------- # STEP 1: query the image from the camera class using `cam`. To avoid # the flashing strobe light, you have to move to the tab with the camera. # ---------------------------------------------------------------------- c_img_raw = None d_img_raw = None print( 'Waiting for c_img, & d_img; please press ENTER in the appropriate tab' ) while c_img_raw is None: c_img_raw = cam.read_color_data() while d_img_raw is None: d_img_raw = cam.read_depth_data() print(' obtained the (raw) c_img and d_img') # ---------------------------------------------------------------------- # STEP 2: process image and save as a 100x100 png, see `camera.py` for some # tests. Image must be saved in specified DVRK_IMG_PATH for the net to see. # Also, if coverage is high enough, EXIT NOW! # ---------------------------------------------------------------------- c_img, d_img = _process_images(c_img_raw, d_img_raw, args) assert c_img.shape == (100, 100, 3), c_img.shape assert d_img.shape == (100, 100, 3), d_img.shape if args.use_color: c_tail = "c_img_{}.png".format(str(i).zfill(2)) img_path = join(C.DVRK_IMG_PATH, c_tail) cv2.imwrite(img_path, c_img) else: d_tail = "d_img_{}.png".format(str(i).zfill(2)) img_path = join(C.DVRK_IMG_PATH, d_tail) cv2.imwrite(img_path, d_img) print('just saved to: {}\n'.format(img_path)) U.single_means(c_img, depth=False) U.single_means(d_img, depth=True) coverage = U.calculate_coverage(c_img) # Ensures we save the final image in case we exit and get high coverage. # Make sure it happens BEFORE the `break` command below so we get final imgs. stats['coverage'].append(coverage) stats['c_img'].append(c_img) stats['d_img'].append(d_img) if coverage > COVERAGE_SUCCESS: print('\nCOVERAGE SUCCESS: {:.3f} > {:.3f}, exiting ...\n'.format( coverage, COVERAGE_SUCCESS)) break else: print('\ncurrent coverage: {:.3f}\n'.format(coverage)) print(' now wait a few seconds for network to run') time.sleep(5) # ---------------------------------------------------------------------- # STEP 3: get the output from the neural network loading class (you did # run it in a separate terminal tab, right?) and then show it to a human. # HUGE ASSUMPTION: that the last text file indicates the action we want. # ---------------------------------------------------------------------- dvrk_action_paths = sorted( [join(C.DVRK_IMG_PATH,x) for x in os.listdir(C.DVRK_IMG_PATH) \ if x[-4:]=='.txt'] ) assert len(dvrk_action_paths) > 0, 'Did you run the neural net code??' action = np.loadtxt(dvrk_action_paths[-1]) print('neural net says: {}'.format(action)) stats['actions'].append(action) # ---------------------------------------------------------------------- # STEP 3.5, only if we're not on the first action, if current image is # too similar to the old one, move the target points closer towards the # center of the cloth plane. An approximation but likely 'good enough'. # It does assume the net would predict a similiar action, though ... # ---------------------------------------------------------------------- if i > 0: # AH! Go to -2 because I modified code to append (c_img,d_img) above. prev_c = stats['c_img'][-2] prev_d = stats['d_img'][-2] diff_l2_c = np.linalg.norm(c_img - prev_c) / np.prod(c_img.shape) diff_l2_d = np.linalg.norm(d_img - prev_d) / np.prod(d_img.shape) diff_ss_c = compare_ssim(c_img, prev_c, multichannel=True) diff_ss_d = compare_ssim(d_img[:, :, 0], prev_d[:, :, 0]) print('\n (c) diff L2: {:.3f}'.format(diff_l2_c)) print(' (d) diff L2: {:.3f}'.format(diff_l2_d)) print(' (c) diff SS: {:.3f}'.format(diff_ss_c)) print(' (d) diff SS: {:.3f}\n'.format(diff_ss_d)) stats['diff_l2_c'].append(diff_l2_c) stats['diff_l2_d'].append(diff_l2_d) stats['diff_ss_c'].append(diff_ss_c) stats['diff_ss_d'].append(diff_ss_d) # Apply action 'compression'? A 0.95 cutoff empirically works well. ss_thresh = 0.95 if diff_ss_c > ss_thresh: exponent += 1 print( 'NOTE structural similiarity exceeds {}'.format(ss_thresh)) action[0] = action[0] * (0.9**exponent) action[1] = action[1] * (0.9**exponent) print('revised action after \'compression\': {} w/exponent {}'. format(action, exponent)) else: exponent = 0 # ---------------------------------------------------------------------- # STEP 4. If the output would result in a dangerous position, human # stops by hitting ESC key. Otherwise, press any other key to continue. # The human should NOT normally be using this !! # ---------------------------------------------------------------------- title = '{} -- ESC TO CANCEL (Or if episode done)'.format(action) if args.use_color: exit = U.call_wait_key(cv2.imshow(title, c_img)) else: exit = U.call_wait_key(cv2.imshow(title, d_img)) cv2.destroyAllWindows() if exit: print('Warning: why are we exiting here?') print( 'It should exit naturally due to (a) coverage or (b) time limits.' ) break # ---------------------------------------------------------------------- # STEP 5: Watch the robot do its action. Terminate the script if the # resulting action makes things fail spectacularly. # ---------------------------------------------------------------------- x = action[0] y = action[1] dx = action[2] dy = action[3] U.move_p_from_net_output(x, y, dx, dy, row_board=C.ROW_BOARD, col_board=C.COL_BOARD, data_square=C.DATA_SQUARE, p=p) # ---------------------------------------------------------------------- # STEP 6. Record statistics. Sleep just in case, also reset images. # Don't save raw images -- causes file sizes to blow up. # ---------------------------------------------------------------------- cam.set_color_none() cam.set_depth_none() print('Reset color/depth in camera class, waiting a few seconds ...') time.sleep(3) # If we ended up using all actions above, we really need one more image. if len(stats['c_img']) == args.max_ep_length: assert len(stats['coverage']) == args.max_ep_length, len( stats['coverage']) c_img_raw = None d_img_raw = None print( 'Waiting for FINAL c_img, & d_img; please press ENTER in the appropriate tab' ) while c_img_raw is None: c_img_raw = cam.read_color_data() while d_img_raw is None: d_img_raw = cam.read_depth_data() c_img, d_img = _process_images(c_img_raw, d_img_raw, args) coverage = U.calculate_coverage(c_img) stats['coverage'].append(coverage) stats['c_img'].append(c_img) stats['d_img'].append(d_img) print('(for full length episode) final coverage: {:.3f}'.format( coverage)) # Final book-keeping and return statistics. print('\nEPISODE DONE!') print(' coverage: {}'.format(stats['coverage'])) print(' len(coverage): {}'.format(len(stats['coverage']))) print(' len(c_img): {}'.format(len(stats['c_img']))) print(' len(d_img): {}'.format(len(stats['d_img']))) print(' len(actions): {}'.format(len(stats['actions']))) # File path shenanigans. if args.use_color: if args.use_other_color: save_path = join('results', 'tier{}_color_yellowcloth'.format(args.tier)) else: save_path = join('results', 'tier{}_color'.format(args.tier)) else: if args.use_other_color: save_path = join('results', 'tier{}_depth_yellowcloth'.format(args.tier)) else: save_path = join('results', 'tier{}_depth'.format(args.tier)) if not os.path.exists(save_path): os.makedirs(save_path) count = len( [x for x in os.listdir(save_path) if 'ep_' in x and '.pkl' in x]) save_path = join(save_path, 'ep_{}_{}.pkl'.format(str(count).zfill(3), U.get_date())) print('All done with episode! Saving stats to: {}'.format(save_path)) with open(save_path, 'wb') as fh: pickle.dump(stats, fh) return stats
def collect_boundaries(p, cam, data_path): """Gather statistics about the workspace on how safe we can set things. Save things in a pickle file specified by the `directory` parameter. Click the ESC key to exit the program and restart. BTW, the four poses we collect will be the four "home" positions that I use later, though with more z-coordinate offset. Some information: `yaw` must be limited in [-180,180] # But [-90,90] is all we need. `pitch` must be limited in [-50,50] # I _think_ ... `roll` must be limited in [-180,180] # I think ... Remember, if I change the numbers, it doesn't impact the code until re-building `guidelines.p`!! """ # First, add stuff that we should already know. (TODO: IS THIS STILL TRUE??) info = {} info['min_yaw'] = -90 info['max_yaw'] = 90 info['min_pitch'] = -15 info['max_pitch'] = 25 info['roll_neg_ubound'] = -150 # (-180, roll_neg_ubound) info['roll_pos_lbound'] = 150 # (roll_pos_lbound, 180) info['min_roll'] = -180 info['max_roll'] = -150 # Be careful about this ordering! Orientation wrt us looking at workspace # from behind computers, so (for example) 'lower' means closer to us. ordering = ['lower_right', 'lower_left', 'upper_left', 'upper_right'] boundary_poses = [] for order in ordering: # Get images, refresh to None each time we need to move. c_img_raw = None d_img_raw = None while c_img_raw is None: c_img_raw = cam.read_color_data() while d_img_raw is None: d_img_raw = cam.read_depth_data() # Move arm to positions to determine approx. safe ranges for x,y,z values. txt = "Move to {} corner now!!".format(order) U.call_wait_key(cv2.imshow(txt, c_img_raw)) pose = p.arm.get_current_pose(unit='rad') boundary_poses.append(pose) cv2.destroyAllWindows() bp = boundary_poses # Save these so that we can use them for `home` positions. These are POSES # so they are TUPLES, of (pos,rot). info['home_LR_pose'] = bp[0] info['home_LL_pose'] = bp[1] info['home_UL_pose'] = bp[2] info['home_UR_pose'] = bp[3] # So P[:,0] is a vector of the x's, P[:,1] vector of y's, P[:,2] vector of z's. # For the k-th item, bp[k][0] = its position, and bp[k][1] = orientation. p_lr = np.squeeze(np.array((bp[0])[0][:3])) p_ll = np.squeeze(np.array((bp[1])[0][:3])) p_ul = np.squeeze(np.array((bp[2])[0][:3])) p_ur = np.squeeze(np.array((bp[3])[0][:3])) P = np.vstack((p_lr, p_ll, p_ul, p_ur)) print("P (shape: {}):\n{}".format(P.shape, P)) # Get ranges. This is a bit of a heuristic, just taking min/max of the four. # Not sure how good this will be? TODO: check this, I wanted to get a rough # sense of the max and min coord ranges we should enforce. #info['min_x'] = np.min( [p_lr[0], p_ll[0], p_ul[0], p_ur[0]] ) #info['max_x'] = np.max( [p_lr[0], p_ll[0], p_ul[0], p_ur[0]] ) #info['min_y'] = np.min( [p_lr[1], p_ll[1], p_ul[1], p_ur[1]] ) #info['max_y'] = np.max( [p_lr[1], p_ll[1], p_ul[1], p_ur[1]] ) #assert info['min_x'] < info['max_x'] #assert info['min_y'] < info['max_y'] # For z, we fit a plane. See https://stackoverflow.com/a/1400338/3287820 # Find (alpha, beta, gamma) s.t. f(x,y) = alpha*x + beta*y + gamma = z. A = np.zeros((3, 3)) # Must be symmetric! A[0, 0] = np.sum(P[:, 0] * P[:, 0]) A[0, 1] = np.sum(P[:, 0] * P[:, 1]) A[0, 2] = np.sum(P[:, 0]) A[1, 0] = np.sum(P[:, 0] * P[:, 1]) A[1, 1] = np.sum(P[:, 1] * P[:, 1]) A[1, 2] = np.sum(P[:, 1]) A[2, 0] = np.sum(P[:, 0]) A[2, 1] = np.sum(P[:, 1]) A[2, 2] = P.shape[0] b = np.array([ np.sum(P[:, 0] * P[:, 2]), np.sum(P[:, 1] * P[:, 2]), np.sum(P[:, 2]) ]) x = np.linalg.inv(A).dot(b) info['z_alpha'] = x[0] info['z_beta'] = x[1] info['z_gamma'] = x[2] # Sanity checks before saving stuff. assert P.shape == (4, 3) print("A:\n{}".format(A)) print("P:\n{}".format(P)) print("x:\n{}".format(x)) print("b:\n{}\n".format(b)) # Log output and save! Json is a bit inconvenient so use pickle. for key in sorted(info.keys()): print('\n{}'.format(key)) print(info[key]) with open(data_path, 'wb') as fh: pickle.dump(info, fh) print('Just saved: {}'.format(data_path))