def inspect(roll_path, fighead): for idx,roll in enumerate(roll_path): print("On: {}".format(roll)) with open(roll, 'r') as f: data = pickle.load(f) rgb = data[0]['c_img'] depth = depth_to_net_dim( data[0]['d_img'], robot='HSR' ) f_rgb = '{}_rgb.png'.format(idx) f_depth = '{}_depth.png'.format(idx) out1 = join(fighead, f_rgb) out2 = join(fighead, f_depth) cv2.imwrite(out1, rgb) cv2.imwrite(out2, depth) # also do the next one ... rgb = data[-2]['c_img'] depth = depth_to_net_dim( data[-2]['d_img'], robot='HSR' ) f_rgb = '{}_next_rgb.png'.format(idx) f_depth = '{}_next_depth.png'.format(idx) out1 = join(fighead, f_rgb) out2 = join(fighead, f_depth) cv2.imwrite(out1, rgb) cv2.imwrite(out2, depth) print("")
def _test_success(self): """Simple tests for success net. Don't forget to process depth images. Should be done after a grasp test since I don't re-position... Note: we have access to `self.sn` but that isn't the actual net which has a `predict`, but it's a wrapper (explained above), but we can access the true network via `self.sn.sdect` and from there call `predict`. """ print("\nNow in `test_success` to check success net...") time.sleep(3) c_img = self.cam.read_color_data() d_img = self.cam.read_depth_data() if np.isnan(np.sum(d_img)): cv2.patchNaNs(d_img, 0.0) d_img = depth_to_net_dim(d_img, robot='HSR') result = self.sn.sdect.predict(np.copy(d_img)) result = np.squeeze(result) print("s-net pred: {} (if [0]<[1] failure, else success...)".format( result)) caption = 'S Predicted: {} (ESC to abort, other key to proceed)'.format( result) cv2.imshow(caption, d_img) key = cv2.waitKey(0) if key in ESC_KEYS: print("Pressed ESC key. Terminating program...") sys.exit()
def _test_grasp(self): """Simple tests for grasping. Don't forget to process depth images. Do this independently of any rollout ... """ print("\nNow in `test_grasp` to check grasping net...") self.position_head() time.sleep(3) c_img = self.cam.read_color_data() d_img = self.cam.read_depth_data() if np.isnan(np.sum(d_img)): cv2.patchNaNs(d_img, 0.0) d_img = depth_to_net_dim(d_img, robot='HSR') pred = self.g_detector.predict(np.copy(d_img)) img = self.dp.draw_prediction(d_img, pred) print("prediction: {}".format(pred)) caption = 'G Predicted: {} (ESC to abort, other key to proceed)'.format( pred) cv2.imshow(caption, img) key = cv2.waitKey(0) if key in ESC_KEYS: print("Pressed ESC key. Terminating program...") sys.exit()
def save_image(self, c_img, d_img, success_class=None): """Save images. Don't forget to process depth images. For now I'm using a tuned cutoff like 1400, at least to _visualize_. NOTE: since the cutoff for turning depth images into black may change, it would be better to save the original d_img in a dictionary. Don't use cv2.imwrite() as I know from experience that it won't work as desired. """ rc = str(self.r_count).zfill(3) f_rc_grasp = 'frame_{}_{}.png'.format(rc, str(self.grasp_count).zfill(2)) f_rc_success = 'frame_{}_{}_class_{}.png'.format( rc, str(self.success_count).zfill(2), success_class) if np.isnan(np.sum(d_img)): cv2.patchNaNs(d_img, 0.0) d_img = depth_to_net_dim(d_img, cutoff=1400) # for visualization only if self.side == "BOTTOM": if self.grasp: pth1 = join(FAST_PATH, 'b_grasp', 'rgb_' + f_rc_grasp) pth2 = join(FAST_PATH, 'b_grasp', 'depth_' + f_rc_grasp) else: pth1 = join(FAST_PATH, 'b_success', 'rgb_' + f_rc_success) pth2 = join(FAST_PATH, 'b_success', 'depth_' + f_rc_success) else: if self.grasp: pth1 = join(FAST_PATH, 't_grasp', 'rgb_' + f_rc_grasp) pth2 = join(FAST_PATH, 't_grasp', 'depth_' + f_rc_grasp) else: pth1 = join(FAST_PATH, 't_success', 'rgb_' + f_rc_success) pth2 = join(FAST_PATH, 't_success', 'depth_' + f_rc_success) cv2.imwrite(pth1, c_img) cv2.imwrite(pth2, d_img)
def shared_code(self, use_d, old_grasp_image): """Shared code for calling the success network. For the timing, avoid counting the time for processing images. Returns dictionary with a bunch of info for later. """ self.whole_body.move_to_joint_positions( {'arm_flex_joint': -np.pi / 16.0}) self.whole_body.move_to_joint_positions( {'head_pan_joint': np.pi / 2.0}) self.whole_body.move_to_joint_positions({'arm_lift_joint': 0.120}) self.whole_body.move_to_joint_positions( {'head_tilt_joint': -np.pi / 4.0}) # -np.pi/36.0}) # Retrieve and (if necessary) process images. time.sleep(3) c_img = self.cam.read_color_data() d_img = self.cam.read_depth_data() d_img_raw = np.copy(d_img) if use_d: d_img = depth_to_net_dim(d_img, robot='HSR') img = np.copy(d_img) else: img = np.copy(c_img) # Call network and record time. stranst = time.time() data = self.sdect.predict(img) etranst = time.time() s_predict_t = etranst - stranst print("\nSuccess predict time: {:.2f}".format(s_predict_t)) print("Success net output (i.e., logits): {}\n".format(data)) # The success net outputs a 2D result for a probability vector. ans = np.argmax(data) success = (ans == 0) # NEW! Can also tell us the difference between grasp and success imgs. diff = np.linalg.norm(old_grasp_image - img) score = compare_ssim(old_grasp_image[:, :, 0], img[:, :, 0]) result = { 'success': success, 'data': data, 'c_img': c_img, 'd_img': d_img, 'd_img_raw': d_img_raw, 's_predict_t': s_predict_t, 'diff_l2': diff, 'diff_ssim': score, } return result
def save_imgs(RES, pref): """I'm thinking we want to check the grasp img w/the subsequent success img. """ diffs = [] ssims = [] for ridx, res in enumerate(RES): fidx = 0 with open(res, 'r') as f: data = pickle.load(f) num_grasp = int(len(data) / 2.0) print(res, num_grasp) previous_grasp_img = None for item in data: if 'type' not in item: continue dimg = item['d_img'] if np.isnan(np.sum(dimg)): cv2.patchNaNs(dimg, 0.0) dimg = depth_to_net_dim(dimg, robot='HSR') # Record current grasp img so we can compare w/next success net img. which_net = item['type'].lower() if which_net == 'grasp': previous_grasp_img = dimg else: diff = np.linalg.norm(previous_grasp_img - dimg) diffs.append(diff) gray1 = previous_grasp_img[:, :, 0] gray2 = dimg[:, :, 0] assert gray1.shape == gray2.shape == (480, 640) # we can optinally return other stuff # http://scikit-image.org/docs/dev/api/skimage.measure.html#skimage.measure.compare_ssim #(score, difference) = compare_ssim(gray1, gray2) score = compare_ssim(gray1, gray2) ssims.append(score) # Save image, put L2 in success net img name side = item['side'].lower() fname = '{}_roll_{}_side_{}_idx_{}_{}.png'.format( pref, ridx, side, fidx, which_net) fname = join(TEST_HEAD, fname) if which_net == 'success': fname = fname.replace('.png', '_{:.3f}_{:.3f}.png'.format(diff, score)) fidx += 1 cv2.imwrite(fname, dimg) return diffs, ssims
def get_label(self): if self.image is None: self.current_image = self.cam.read_color_data() tmp_depth = self.cam.read_depth_data() else: self.current_image = self.image if False: # ---------------------------------------------------------------------- # Temporary debugging to get viewpoints to align w/H's data, as needed. # Will save the image(s) based on what the robot sees during the click interface. # For depth cutoff, note that the HSR uses *millimeters*. # ---------------------------------------------------------------------- kk = len([x for x in os.listdir('imgs/') if 'view_close_rgb' in x]) img_name = 'imgs/view_close_rgb_{}.png'.format(str(kk).zfill(2)) cv2.imwrite(img_name, self.current_image) kk = len( [x for x in os.listdir('imgs/') if 'view_close_depth' in x]) img_name = 'imgs/view_close_depth_{}.png'.format(str(kk).zfill(2)) # save tmp_depth to look at histograms later, etc. numpy_name = 'temp_depth_{}.txt'.format(kk) np.savetxt(numpy_name, tmp_depth) print("just saved {}".format(numpy_name)) d_img = depth_to_net_dim(tmp_depth, cutoff=1250) cv2.imwrite(img_name, d_img) # ---------------------------------------------------------------------- self.tkimg = ImageTk.PhotoImage(Image.fromarray(self.current_image)) self.mainPanel.config(width=max(self.tkimg.width(), 400), height=max(self.tkimg.height(), 400)) self.mainPanel.create_image(0, 0, image=self.tkimg, anchor=NW) self.progLabel.config(text="%04d/%04d" % (self.cur, self.total)) # load labels self.clearBBox() self.imagename = 'frame_' + str(self.label_count) labelname = self.imagename + '.p' self.labelfilename = os.path.join(self.outDir, labelname) bbox_cnt = 0 self.lock = True
def iterate(dtype): """Remember, need to run `convert_h_rollouts_to_mine.py` before this. """ if dtype == 'mine': for pth in path_mine: _, rollouts = process(pth) # Do some debugging of the first one first = True # For debugging prints for pkl_file in rollouts: with open(pkl_file, 'r') as f: data = pickle.load(f) print("loaded: {}, has length {}".format(pkl_file, len(data))) fig_path_rollout = make_fig_path(pkl_file) # Analyze _my_ data, with the HSR. Note the depth image processing. # For all grasp-related images we might as well overlay the poses. for t_idx,datum in enumerate(data[:-1]): if first: print(datum['side'], datum['type']) c_img = datum['c_img'] d_img = datum['d_img'] assert c_img.shape == (480,640,3) and d_img.shape == (480,640) d_img = depth_to_net_dim(d_img, robot='HSR') t_str = str(t_idx).zfill(2) pth1 = join(fig_path_rollout, 'c_img_{}.png'.format(t_str)) pth2 = join(fig_path_rollout, 'd_img_{}.png'.format(t_str)) if datum['type'] == 'grasp': c_img = DP.draw_prediction(c_img, datum['net_pose']) d_img = DP.draw_prediction(d_img, datum['net_pose']) cv2.imwrite(pth1, c_img) cv2.imwrite(pth2, d_img) # I saved a final dictionary. final_dict = data[-1] image_start = final_dict['image_start'] image_final = final_dict['image_final'] pth_s = join(fig_path_rollout, 'image_start.png') pth_f = join(fig_path_rollout, 'image_final.png') cv2.imwrite(pth_s, image_start) cv2.imwrite(pth_f, image_final) first = False elif dtype == 'theirs': for pth in path_theirs: _, rollouts = process(pth) first = True # For debugging prints for pkl_file in rollouts: with open(pkl_file, 'r') as f: data = pickle.load(f) print("loaded: {}, has length {}".format(pkl_file, len(data))) fig_path_rollout = make_fig_path(pkl_file) # Analyze _my_ data, with the HSR. We don't need to process the # depth image because they saved the processed depth image. See # Prakash's email for the keys in each `datum`. It starts at # BOTTOM then goes to the top. for t_idx,datum in enumerate(data[:-1]): if first: print(datum['side'], datum['pose'], datum['class']) print("({:.1f}, {:.2f}, {:.1f})".format(datum['t_grasp'], datum['t_fwrd_pass'], datum['t_transition'])) c_img = datum['c_img'] d_img = datum['d_img'] assert c_img.shape == (480,640,3) and d_img.shape == (480,640,3) # Actually they already processed it. #d_img = depth_to_net_dim(d_img, robot='Fetch') o_img = datum['overhead_img'] t_str = str(t_idx).zfill(2) pth1 = join(fig_path_rollout, 'c_img_{}.png'.format(t_str)) pth2 = join(fig_path_rollout, 'd_img_{}.png'.format(t_str)) pth3 = join(fig_path_rollout, 'o_img_{}.png'.format(t_str)) cv2.imwrite(pth1, c_img) cv2.imwrite(pth2, d_img) cv2.imwrite(pth3, o_img) # I saved a final dictionary to try and match it with my data. final_dict = data[-1] first = False
def bed_make(self): """Runs the pipeline for deployment, testing out bed-making. """ # Get the starting image (from USB webcam). Try a second as well. cap = cv2.VideoCapture(0) frame = None while frame is None: ret, frame = cap.read() cv2.waitKey(50) self.image_start = frame cv2.imwrite('image_start.png', self.image_start) _, frame = cap.read() self.image_start2 = frame cv2.imwrite('image_start2.png', self.image_start2) cap.release() print( "NOTE! Recorded `image_start` for coverage evaluation. Was it set up?" ) def get_pose(data_all): # See `find_pick_region_labeler` in `p_pi/bed_making/gripper.py`. # It's because from the web labeler, we get a bunch of objects. # So we have to compute the pose (x,y) from it. res = data_all['objects'][0] x_min = float(res['box'][0]) y_min = float(res['box'][1]) x_max = float(res['box'][2]) y_max = float(res['box'][3]) x = (x_max - x_min) / 2.0 + x_min y = (y_max - y_min) / 2.0 + y_min return (x, y) args = self.args use_d = BED_CFG.GRASP_CONFIG.USE_DEPTH self.get_new_grasp = True self.new_grasp = True self.rollout_stats = [] # What we actually save for analysis later # Add to self.rollout_stats at the end for more timing info self.g_time_stats = [] # for _execution_ of a grasp self.move_time_stats = [] # for moving to the other side while True: c_img = self.cam.read_color_data() d_img = self.cam.read_depth_data() if (not c_img.all() == None and not d_img.all() == None): if self.new_grasp: self.position_head() else: self.new_grasp = True time.sleep(3) c_img = self.cam.read_color_data() d_img = self.cam.read_depth_data() d_img_raw = np.copy(d_img) # Needed for determining grasp pose # -------------------------------------------------------------- # Process depth images! Helps network, human, and (presumably) analytic. # Obviously human can see the c_img as well ... hard to compare fairly. # -------------------------------------------------------------- if use_d: if np.isnan(np.sum(d_img)): cv2.patchNaNs(d_img, 0.0) d_img = depth_to_net_dim(d_img, robot='HSR') policy_input = np.copy(d_img) else: policy_input = np.copy(c_img) # -------------------------------------------------------------- # Run grasp detector to get data=(x,y) point for target, record stats. # Note that the web labeler returns a dictionary like this: # {'objects': [{'box': (155, 187, 165, 194), 'class': 0}], 'num_labels': 1} # but we really want just the 2D grasping point. So use `get_pose()`. # Also, for the analytic one, we'll pick the highest point ourselves. # -------------------------------------------------------------- sgraspt = time.time() if args.g_type == 'network': data = self.g_detector.predict(policy_input) elif args.g_type == 'analytic': data_all = self.wl.label_image(policy_input) data = get_pose(data_all) elif args.g_type == 'human': data_all = self.wl.label_image(policy_input) data = get_pose(data_all) egraspt = time.time() g_predict_t = egraspt - sgraspt print("Grasp predict time: {:.2f}".format(g_predict_t)) self.record_stats(c_img, d_img_raw, data, self.side, g_predict_t, 'grasp') # For safety, we can check image and abort as needed before execution. if use_d: img = self.dp.draw_prediction(d_img, data) else: img = self.dp.draw_prediction(c_img, data) caption = 'G Predicted: {} (ESC to abort, other key to proceed)'.format( data) call_wait_key(cv2.imshow(caption, img)) # -------------------------------------------------------------- # Broadcast grasp pose, execute the grasp, check for success. # We'll use the `find_pick_region_net` since the `data` is the # (x,y) pose, and not `find_pick_region_labeler`. # -------------------------------------------------------------- self.gripper.find_pick_region_net( pose=data, c_img=c_img, d_img=d_img_raw, count=self.grasp_count, side=self.side, apply_offset=self.apply_offset) pick_found, bed_pick = self.check_card_found() if self.side == "BOTTOM": self.whole_body.move_to_go() self.tt.move_to_pose(self.omni_base, 'lower_start') tic = time.time() self.gripper.execute_grasp(bed_pick, self.whole_body, 'head_down') toc = time.time() else: self.whole_body.move_to_go() self.tt.move_to_pose(self.omni_base, 'top_mid') tic = time.time() self.gripper.execute_grasp(bed_pick, self.whole_body, 'head_up') toc = time.time() self.g_time_stats.append(toc - tic) self.check_success_state(policy_input)
from fast_grasp_detect.data_aug.draw_cross_hair import DrawPrediction DP = DrawPrediction() PATH = '/nfs/diskstation/seita/bed-make/results_post_icra/results_network-white_rollout_24_len_7.p' if __name__ == "__main__": with open(PATH, 'r') as f: data = pickle.load(f) print("loaded: {}, has length {}".format(PATH, len(data))) # Remember, I saved the last data point to have some extra information. for t_idx, datum in enumerate(data[:-1]): print(datum['side'], datum['type']) c_img = datum['c_img'] d_img = datum['d_img'] assert c_img.shape == (480, 640, 3) and d_img.shape == (480, 640) d_img = depth_to_net_dim(d_img, robot='HSR') t_str = str(t_idx).zfill(2) pth1 = join('video', 'c_img_{}.png'.format(t_str)) pth2 = join('video', 'd_img_{}.png'.format(t_str)) if datum['type'] == 'grasp': c_img = DP.draw_prediction(c_img, datum['net_pose']) d_img = DP.draw_prediction(d_img, datum['net_pose']) cv2.imwrite(pth1, c_img) cv2.imwrite(pth2, d_img) # older stuff def iterate(dtype): """Remember, need to run `convert_h_rollouts_to_mine.py` before this. """
c_path = os.path.join(IMG_PATH, 'ron_num{}_a{}_h{}_rgb.png'.format(num, arm_bin_idx, hs0_bin_idx) ) d_path_1ch = os.path.join(IMG_PATH, 'ron_num{}_a{}_h{}_depth_1ch.png'.format(num, arm_bin_idx, hs0_bin_idx) ) assert data['RGBImage'].shape == (480, 640, 3) assert data['depthImage'].shape == (480, 640) c_img = (data['RGBImage']).copy() d_img_1ch = (data['depthImage']).copy() position = data['markerPos'] # I normally do this for preprocessing: # https://github.com/DanielTakeshi/fast_grasp_detect/blob/master/src/fast_grasp_detect/data_aug/depth_preprocess.py # Ron didn't do this but the above preprocessing makes the depth images much more human-interpretable. d_img_3ch = depth_to_net_dim(data['depthImage']) assert d_img_3ch.shape == (480, 640, 3) d_path_3ch = d_path_1ch.replace('1ch.png','3ch.png') # Overlay images with the marker position, which presumably is the target. # Update: probably don't need for the c_img, can leave off. pos = (position[0], position[1]) #cv2.circle(c_img, center=pos, radius=8, color=RED, thickness=-1) #cv2.circle(c_img, center=pos, radius=10, color=BLACK, thickness=3) #cv2.circle(d_img_1ch, center=pos, radius=8, color=RED, thickness=-1) #cv2.circle(d_img_1ch, center=pos, radius=10, color=BLACK, thickness=3) #cv2.circle(d_img_3ch, center=pos, radius=8, color=RED, thickness=-1) #cv2.circle(d_img_3ch, center=pos, radius=10, color=BLACK, thickness=3) cv2.imwrite(c_path, c_img) cv2.imwrite(d_path_1ch, d_img_1ch)
labels = list() userinput = 'a' while userinput != 'q': print("rollout", rollout_num) # generate random numbers for top/bottom, success/failure of rollout top = np.random.randint(2) success = np.random.randint(2) labels.append(str(success)) # classification labels print("Top (1)/Bottom (0): ", top) print("Success (1)/Failure (0): ", success) userinput = raw_input( "press enter to take the picture, or enter q to quit: ") if userinput != 'q': c_img = camera.read_color_data() d_img = camera.read_depth_data() # preprocess depth image if np.isnan(np.sum(d_img)): cv2.patchNaNs(d_img, 0.0) d_img = depth_to_net_dim(d_img, robot="HSR") # save images cv2.imwrite( os.path.join(SAVE_PATH, "rgb_" + str(rollout_num) + ".png"), c_img) cv2.imwrite( os.path.join(SAVE_PATH, "depth_" + str(rollout_num) + ".png"), d_img) rollout_num += 1 # write labels to output with open(os.path.join(SAVE_PATH, "labels.txt"), 'w') as fh: fh.write("\n".join(labels))
# Debug and accumulate statistics for plotting later. print(" data['pose']: {}".format(np.array(data['pose']))) assert data['c_img'].shape == (480, 640, 3) assert data['d_img'].shape == (480, 640) # Deal with paths and load the images. Also, process depth images. num1 = str(fidx).zfill(2) num2 = str(didx).zfill(3) c_path = os.path.join(IMG_PATH, 'file_{}_idx_{}_rgb.png'.format(num1,num2)) d_path = os.path.join(IMG_PATH, 'file_{}_idx_{}_depth.png'.format(num1,num2)) c_img = (data['c_img']).copy() d_img = (data['d_img']).copy() #cv2.patchNaNs(d_img, 0) # Shouldn't be needed with HSR data. # NOTE NOTE NOTE!! This cutoff is HSR and situation dependent! d_img = depth_to_net_dim(d_img, cutoff=1400) assert d_img.shape == (480, 640, 3) pos = tuple(data['pose']) all_x.append(pos[0]) all_y.append(pos[1]) # Can overlay images with the marker position, which presumably is the target. cv2.circle(c_img, center=pos, radius=2, color=RED, thickness=-1) cv2.circle(c_img, center=pos, radius=3, color=BLACK, thickness=1) cv2.circle(d_img, center=pos, radius=2, color=RED, thickness=-1) cv2.circle(d_img, center=pos, radius=3, color=BLACK, thickness=1) cv2.imwrite(c_path, c_img) cv2.imwrite(d_path, d_img) print("total images: {}".format(total))