def process_depth_image(depth, crop_size, out_size=300, return_mask=False, crop_y_offset=0): imh, imw = depth.shape with TimeIt('1'): # Crop. depth_crop = depth[(imh - crop_size) // 2 - crop_y_offset:(imh - crop_size) // 2 + crop_size - crop_y_offset, (imw - crop_size) // 2:(imw - crop_size) // 2 + crop_size] # depth_nan_mask = np.isnan(depth_crop).astype(np.uint8) # Inpaint # OpenCV inpainting does weird things at the border. with TimeIt('2'): depth_crop = cv2.copyMakeBorder(depth_crop, 1, 1, 1, 1, cv2.BORDER_DEFAULT) depth_nan_mask = np.isnan(depth_crop).astype(np.uint8) with TimeIt('3'): depth_crop[depth_nan_mask == 1] = 0 with TimeIt('4'): # Scale to keep as float, but has to be in bounds -1:1 to keep opencv happy. depth_scale = np.abs(depth_crop).max() depth_crop = depth_crop.astype( np.float32) / depth_scale # Has to be float32, 64 not supported. with TimeIt('Inpainting'): depth_crop = cv2.inpaint(depth_crop, depth_nan_mask, 1, cv2.INPAINT_NS) # Back to original size and value range. depth_crop = depth_crop[1:-1, 1:-1] depth_crop = depth_crop * depth_scale with TimeIt('5'): # Resize depth_crop = cv2.resize(depth_crop, (out_size, out_size), cv2.INTER_AREA) if return_mask: with TimeIt('6'): depth_nan_mask = depth_nan_mask[1:-1, 1:-1] depth_nan_mask = cv2.resize(depth_nan_mask, (out_size, out_size), cv2.INTER_NEAREST) return depth_crop, depth_nan_mask else: return depth_crop
def compute_service_handler(self, req): self.waiting = True while not self.received: rospy.sleep(0.01) self.waiting = False self.received = False with TimeIt('Total'): depth = self.curr_depth_img.copy() camera_pose = self.last_image_pose cam_p = camera_pose.position camera_rot = tft.quaternion_matrix( tfh.quaternion_to_list(camera_pose.orientation))[0:3, 0:3] # Do grasp prediction depth_crop, depth_nan_mask = process_depth_image( depth, self.img_crop_size, 300, return_mask=True, crop_y_offset=self.img_crop_y_offset) points, angle, width_img, _ = predict( depth_crop, self.model, process_depth=False, depth_nan_mask=depth_nan_mask, filters=(2.0, 2.0, 2.0)) # Mask Points Here angle_orig = angle angle -= np.arcsin( camera_rot[0, 1]) # Correct for the rotation of the camera angle = (angle + np.pi / 2) % np.pi - np.pi / 2 # Wrap [-np.pi/2, np.pi/2] # Convert to 3D positions. imh, imw = depth.shape x = ((np.vstack((np.linspace( (imw - self.img_crop_size) // 2, (imw - self.img_crop_size) // 2 + self.img_crop_size, depth_crop.shape[1], np.float), ) * depth_crop.shape[0]) - self.cam_K[0, 2]) / self.cam_K[0, 0] * depth_crop).flatten() y = ((np.vstack((np.linspace( (imh - self.img_crop_size) // 2 - self.img_crop_y_offset, (imh - self.img_crop_size) // 2 + self.img_crop_size - self.img_crop_y_offset, depth_crop.shape[0], np.float), ) * depth_crop.shape[1]).T - self.cam_K[1, 2]) / self.cam_K[1, 1] * depth_crop).flatten() pos = np.dot(camera_rot, np.stack( (x, y, depth_crop.flatten()))).T + np.array( [[cam_p.x, cam_p.y, cam_p.z]]) width_m = width_img / 300.0 * 2.0 * depth_crop * np.tan( self.cam_fov * self.img_crop_size / depth.shape[0] / 2.0 / 180.0 * np.pi) best_g_unr = tuple( peak_local_max(points, min_distance=10, threshold_abs=0.02, num_peaks=1)[0]) best_g = np.ravel_multi_index(best_g_unr, points.shape) max_pixel = ((np.array(best_g) / 300 * self.img_crop_size) + np.array([(480 - self.img_crop_size) // 2 - self.img_crop_y_offset, (640 - self.img_crop_size) // 2])) max_pixel = np.round(max_pixel) ret = GraspPredictionResponse() ret.success = True g = ret.best_grasp g.pose.position.x = pos[best_g, 0] g.pose.position.y = pos[best_g, 1] g.pose.position.z = pos[best_g, 2] g.pose.orientation = tfh.list_to_quaternion( tft.quaternion_from_euler( np.pi, 0, ((angle[best_g_unr] % np.pi) - np.pi / 2))) g.width = width_m[best_g_unr] g.quality = points[best_g_unr] ret.depth = bridge.cv2_to_imgmsg(depth) ret.grasp = [ max_pixel[0], max_pixel[1], angle_orig[best_g_unr], g.quality, width_img[best_g_unr], width_img[best_g_unr] / 2 ] show = gridshow( 'Display', [depth_crop, points], [(0.30, 0.55), None, (-np.pi / 2, np.pi / 2)], [cv2.COLORMAP_BONE, cv2.COLORMAP_JET, cv2.COLORMAP_BONE], 3, False) self.img_pub.publish(bridge.cv2_to_imgmsg(show)) return ret
def compute_service_handler(self, req): # if self.curr_depth_img is None: # rospy.logerr('No depth image received yet.') # rospy.sleep(0.5) # if time.time() - self.curr_img_time > 0.5: # rospy.logerr('The Realsense node has died') # return GraspPredictionResponse() self.waiting = True while not self.received: rospy.sleep(0.01) self.waiting = False self.received = False with TimeIt('Total'): depth = self.curr_depth_img.copy() camera_pose = self.last_image_pose cam_p = camera_pose.position camera_rot = tft.quaternion_matrix(tfh.quaternion_to_list(camera_pose.orientation))[0:3, 0:3] # Do grasp prediction depth_crop, depth_nan_mask = process_depth_image(depth, self.img_crop_size, 300, return_mask=True, crop_y_offset=self.img_crop_y_offset) points, angle, width_img, _ = predict(depth_crop, process_depth=False, depth_nan_mask=depth_nan_mask, filters=(2.0, 2.0, 2.0)) # Mask Points Here angle -= np.arcsin(camera_rot[0, 1]) # Correct for the rotation of the camera angle = (angle + np.pi/2) % np.pi - np.pi/2 # Wrap [-np.pi/2, np.pi/2] # Convert to 3D positions. imh, imw = depth.shape x = ((np.vstack((np.linspace((imw - self.img_crop_size) // 2, (imw - self.img_crop_size) // 2 + self.img_crop_size, depth_crop.shape[1], np.float), )*depth_crop.shape[0]) - self.cam_K[0, 2])/self.cam_K[0, 0] * depth_crop).flatten() y = ((np.vstack((np.linspace((imh - self.img_crop_size) // 2 - self.img_crop_y_offset, (imh - self.img_crop_size) // 2 + self.img_crop_size - self.img_crop_y_offset, depth_crop.shape[0], np.float), )*depth_crop.shape[1]).T - self.cam_K[1,2])/self.cam_K[1, 1] * depth_crop).flatten() pos = np.dot(camera_rot, np.stack((x, y, depth_crop.flatten()))).T + np.array([[cam_p.x, cam_p.y, cam_p.z]]) width_m = width_img / 300.0 * 2.0 * depth_crop * np.tan(self.cam_fov * self.img_crop_size/depth.shape[0] / 2.0 / 180.0 * np.pi) best_g = np.argmax(points) best_g_unr = np.unravel_index(best_g, points.shape) ret = GraspPredictionResponse() ret.success = True g = ret.best_grasp g.pose.position.x = pos[best_g, 0] g.pose.position.y = pos[best_g, 1] g.pose.position.z = pos[best_g, 2] g.pose.orientation = tfh.list_to_quaternion(tft.quaternion_from_euler(np.pi, 0, ((angle[best_g_unr]%np.pi) - np.pi/2))) g.width = width_m[best_g_unr] g.quality = points[best_g_unr] show = gridshow('Display', [depth_crop, points], [(0.30, 0.55), None, (-np.pi/2, np.pi/2)], [cv2.COLORMAP_BONE, cv2.COLORMAP_JET, cv2.COLORMAP_BONE], 3, False) self.img_pub.publish(bridge.cv2_to_imgmsg(show)) return ret
def update_service_handler(self, req): """ Update the GridWorld with a new observation, compute the viewpoint entropy and generate a new command. :param req: Ignored :return: NextViewpointResponse (success flag, best grsap, velocity command) """ # Some initial checks if self.curr_depth_img is None: rospy.logerr('No depth image received yet.') rospy.sleep(0.5) if time.time() - self.curr_img_time > 0.5: rospy.logerr('The Realsense node has died') return NextViewpointResponse() with TimeIt('Total'): with TimeIt('Update Histogram'): # Step 1: Perform a GG-CNN prediction and update the grid world with the observations self.no_viewpoints += 1 depth = self.curr_depth_img.copy() camera_pose = self.last_image_pose cam_p = camera_pose.position self.position_history.append( np.array([cam_p.x, cam_p.y, cam_p.z, 0])) # For display purposes. newpos_pixel = self.gw.pos_to_cell( np.array([[cam_p.x, cam_p.y]]))[0] self.gw.visited[newpos_pixel[0], newpos_pixel[1]] = self.gw.visited.max() + 1 camera_rot = tft.quaternion_matrix( tfh.quaternion_to_list(camera_pose.orientation))[0:3, 0:3] # Do grasp prediction depth_crop, depth_nan_mask = process_depth_image( depth, self.img_crop_size, 300, return_mask=True, crop_y_offset=self.img_crop_y_offset) points, angle, width_img, _ = predict( depth_crop, process_depth=False, depth_nan_mask=depth_nan_mask) angle -= np.arcsin( camera_rot[0, 1]) # Correct for the rotation of the camera angle = (angle + np.pi / 2) % np.pi # Wrap [0, pi] # Convert to 3D positions. imh, imw = depth.shape x = ((np.vstack((np.linspace( (imw - self.img_crop_size) // 2, (imw - self.img_crop_size) // 2 + self.img_crop_size, depth_crop.shape[1], np.float), ) * depth_crop.shape[0]) - self.cam_K[0, 2]) / self.cam_K[0, 0] * depth_crop).flatten() y = ((np.vstack((np.linspace( (imh - self.img_crop_size) // 2 - self.img_crop_y_offset, (imh - self.img_crop_size) // 2 + self.img_crop_size - self.img_crop_y_offset, depth_crop.shape[0], np.float), ) * depth_crop.shape[1]).T - self.cam_K[1, 2]) / self.cam_K[1, 1] * depth_crop).flatten() pos = np.dot(camera_rot, np.stack( (x, y, depth_crop.flatten()))).T + np.array( [[cam_p.x, cam_p.y, cam_p.z]]) # Clean the data a bit. pos[depth_nan_mask.flatten() == 1, :] = 0 # Get rid of NaNs pos[pos[:, 2] > 0.17, :] = 0 # Ignore obvious noise. pos[pos[:, 2] < 0.0, :] = 0 # Ignore obvious noise. cell_ids = self.gw.pos_to_cell(pos[:, :2]) width_m = width_img / 300.0 * 2.0 * depth_crop * np.tan( self.cam_fov * self.img_crop_size / depth.shape[0] / 2.0 / 180.0 * np.pi) update_batch([pos[:, 2], width_m.flatten()], cell_ids, self.gw.count, [self.gw.depth_mean, self.gw.width_mean], [self.gw.depth_var, self.gw.width_var]) update_histogram_angle(points.flatten(), angle.flatten(), cell_ids, self.gw.hist) with TimeIt('Calculate Best Grasp'): # Step 2: Compute the position of the best grasp in the GridWorld # Sum over all angles to get the grasp quality only. hist_sum_q = np.sum(self.gw.hist, axis=2) weights = np.arange(0.5 / self.hist_bins_q, 1.0, 1 / self.hist_bins_q) hist_mean = np.sum( hist_sum_q * weights.reshape((1, 1, -1)), axis=2) / (np.sum(hist_sum_q, axis=2) + 1e-6) hist_mean[self.gw.count == 0] = 0 # Ignore areas we haven't seen yet. hist_mean[0, :] = 0 # Ignore single pixel along each edge. hist_mean[-1, :] = 0 hist_mean[:, 0] = 0 hist_mean[:, -1] = 0 hist_mean -= self.fgw.failures hist_mean = np.clip(hist_mean, 0.0, 1.0) # ArgMax of grasp quality q_am = np.unravel_index(np.argmax(hist_mean), hist_mean.shape) # Interpolate position between the neighbours of the best grasp, weighted by quality q_ama = np.array(q_am) conn_neighbours = np.array([q_ama]) # Disable rounding neighbour_weights = hist_mean[conn_neighbours[:, 0], conn_neighbours[:, 1]] q_am_neigh = self.gw.cell_to_pos(conn_neighbours) q_am_neigh_avg = np.average(q_am_neigh, weights=neighbour_weights, axis=0) q_am_pos = (q_am_neigh_avg[0], q_am_neigh_avg[1] ) # This is the grasp center # Perform same weighted averaging of the angles. best_grasp_hist = self.gw.hist[conn_neighbours[:, 0], conn_neighbours[:, 1], :, :] angle_weights = np.sum((best_grasp_hist - 1) * weights.reshape( (1, 1, -1)), axis=2) ang_bins = (np.arange(0.5 / self.hist_bins_a, 1.0, 1 / self.hist_bins_a) * np.pi).reshape( 1, -1) # Compute the weighted vector mean of the sin/cos components of the angle predictions # Do double angles so that -np.pi/2 == np.pi/2, then unwrap q_am_ang = np.arctan2( np.sum( np.sin(ang_bins * 2) * angle_weights * neighbour_weights.reshape(-1, 1)), np.sum( np.cos(ang_bins * 2) * angle_weights * neighbour_weights.reshape(-1, 1))) if q_am_ang < 0: q_am_ang += 2 * np.pi q_am_ang = q_am_ang / 2.0 - np.pi / 2 # Get the depth and width at the grasp center q_am_dep = self.gw.depth_mean[q_am] q_am_wid = self.gw.width_mean[q_am] with TimeIt('Calculate Information Gain'): # Step 3: Compute the expected information gain from a viewpoint above every cell in the GridWorld # Compute entropy per cell. hist_p = hist_sum_q / np.expand_dims( np.sum(hist_sum_q, axis=2) + 1e-6, -1) hist_ent = -np.sum(hist_p * np.log(hist_p + 1e-6), axis=2) # Treat camera field of view as a Gaussian # Field of view in number gridworld cells fov = int(cam_p.z * 2 * np.tan(self.cam_fov * self.img_crop_size / depth.shape[0] / 2.0 / 180.0 * np.pi) / self.gw.res) exp_inf_gain = gaussian_filter(hist_ent, fov / 6, truncate=3) # Track changes by KL Divergence (not used/disabled by default) kl_divergence = np.sum(hist_p * np.log( (hist_p + 1e-6) / (self.gw.hist_p_prev + 1e-6)), axis=2) self.gw.hist_p_prev = hist_p kl_divergence[0, :] = 0 kl_divergence[-1, :] = 0 kl_divergence[:, 0] = 0 kl_divergence[:, -1] = 0 norm_i_gain = 1 - np.exp(-1 * kl_divergence.sum()) self.position_history[-1][-1] = norm_i_gain with TimeIt('Calculate Travel Cost'): # Step 4: Compute cost of moving away from the best detected grasp. # Distance from current robot pos. d_from_robot = np.sqrt((self._xv - cam_p.x)**2 + (self._yv - cam_p.y)**2) # Distance from best detected grasp, weighted by the robot's current height (Z axis) d_from_best_q = np.sqrt( (self._xv - q_am_pos[0])**2 + (self._yv - q_am_pos[1])** 2) # Cost of moving away from the best grasp. height_weight = (cam_p.z - self.height[1]) / ( self.height[0] - self.height[1]) + 1e-2 height_weight = max(min(height_weight, 1.0), 0.0) best_cost = (d_from_best_q / self.dist_from_best_scale) * ( 1 - height_weight) * self.dist_from_best_gain # Distance from previous viewpoints (dist_from_prev_view_gain is 0 by default) d_from_prev_view = np.zeros(self.gw.shape) for x, y, z, kl in self.position_history: d_from_prev_view += np.clip( 1 - (np.sqrt((self._xv - x)**2 + (self._yv - y)**2 + 0 * (cam_p.z - z)**2) / self.dist_from_prev_view_scale), 0, 1) * (1 - kl) prev_view_cost = d_from_prev_view * self.dist_from_prev_view_gain # Calculate total expected information gain. exp_inf_gain_before = exp_inf_gain.copy() exp_inf_gain -= best_cost exp_inf_gain -= prev_view_cost # Compute local direction of maximum information gain exp_inf_gain_mask = exp_inf_gain.copy() greedy_window = 0.1 exp_inf_gain_mask[ d_from_robot > greedy_window] = exp_inf_gain.min() ig_am = np.unravel_index(np.argmax(exp_inf_gain_mask), exp_inf_gain.shape) maxpos = self.gw.cell_to_pos([ig_am])[0] diff = (maxpos - np.array([cam_p.x, cam_p.y])) / greedy_window # Maximum of 1 if np.linalg.norm(diff) > 1.0: diff = diff / np.linalg.norm(diff) with TimeIt('Response'): # Step 5: Generate a Response ret = NextViewpointResponse() ret.success = True ret.no_viewpoints = self.no_viewpoints # xyz velocity normalised to 1 ret.velocity_cmd.linear.x = diff[0] ret.velocity_cmd.linear.y = diff[1] ret.velocity_cmd.linear.z = -1 * (np.sqrt(1 - diff[0]**2 - diff[1]**2)) # Grasp pose ret.best_grasp.pose.position.x = q_am_pos[0] ret.best_grasp.pose.position.y = q_am_pos[1] ret.best_grasp.pose.position.z = q_am_dep q = tft.quaternion_from_euler(np.pi, 0, q_am_ang - np.pi / 2) ret.best_grasp.pose.orientation = tfh.list_to_quaternion(q) ret.best_grasp.quality = hist_mean[q_am[0], q_am[1]] ret.best_grasp.width = q_am_wid ret.best_grasp.entropy = hist_ent[q_am[0], q_am[1]] # Normalise for plotting purposes exp_inf_gain = (exp_inf_gain - exp_inf_gain.min()) / ( exp_inf_gain.max() - exp_inf_gain.min()) * ( exp_inf_gain_before.max() - exp_inf_gain_before.min()) show = gridshow('Display', [ cv2.resize(points, hist_ent.shape), hist_mean, hist_ent, exp_inf_gain, exp_inf_gain_before, self.gw.visited ], [ None, None, None, (exp_inf_gain.min(), exp_inf_gain_before.max()), (exp_inf_gain.min(), exp_inf_gain_before.max()), None ], [cv2.COLORMAP_JET] + [ cv2.COLORMAP_JET, ] * 4 + [cv2.COLORMAP_BONE], 3, False) self.img_pub.publish(bridge.cv2_to_imgmsg(show)) # For dumping things to npz files if False: kwargs = { 'M': self.gw.hist, 'depth_crop': depth_crop, 'points': points, 'hist_sum_q': hist_sum_q, 'hist_mean': hist_mean, 'q_am': q_am, 'q_am_pos': q_am_pos, 'best_grasp_hist': best_grasp_hist, 'hist_ent': hist_ent, 'best_cost': best_cost, 'exp_inf_gain': exp_inf_gain, 'pos_history': np.array(self.position_history), 'visited': self.gw.visited, 'depth': depth_crop, 'v': diff } np.savez('/home/guest/numpy_out/%d.npz' % self.counter, **kwargs) self.counter += 1 return ret
def depth_callback(depth_message): global prev_mp global fx, cx, fy, cy with TimeIt('Predict'): depth = bridge.imgmsg_to_cv2(depth_message) # Crop a square out of the middle of the depth and resize it to 300*300 crop_size = 400 crop_offset = 0 out_size = 300 points_out, ang_out, width_out, depth_crop = predict( depth, crop_size, out_size=out_size, crop_y_offset=crop_offset) with TimeIt('Calculate Depth'): # Figure out roughly the depth in mm of the part between the grippers for collision avoidance. depth_center = depth_crop[100:141, 130:171].flatten() depth_center.sort() depth_center = depth_center[:10].mean() * 1000.0 with TimeIt('Control'): # Calculate the best pose from the camera intrinsics. maxes = None ALWAYS_MAX = True # Use ALWAYS_MAX = True for the open-loop solution. if ALWAYS_MAX: # Track the global max. max_pixel = np.array( np.unravel_index(np.argmax(points_out), points_out.shape)) prev_mp = max_pixel.astype(np.int) else: # Calculate a set of local maxes. Choose the one that is closes to the previous one. maxes = peak_local_max(points_out, min_distance=10, threshold_abs=0.1, num_peaks=3) if maxes.shape[0] == 0: rospy.logerr('No Local Maxes') return max_pixel = maxes[np.argmin(np.linalg.norm(maxes - prev_mp, axis=1))] # Keep a global copy for next iteration. prev_mp = (max_pixel * 0.25 + prev_mp * 0.75).astype(np.int) ang = ang_out[max_pixel[0], max_pixel[1]] width = width_out[max_pixel[0], max_pixel[1]] # Convert max_pixel back to uncropped/resized image coordinates in order to do the camera transform. max_pixel = ((np.array(max_pixel) / out_size * crop_size) + np.array([(480 - crop_size) // 2 - crop_offset, (640 - crop_size) // 2])) max_pixel = np.round(max_pixel).astype(np.int) point_depth = depth[max_pixel[0], max_pixel[1]] # Compute the actual position. x = (max_pixel[1] - cx) / (fx) * point_depth y = (max_pixel[0] - cy) / (fy) * point_depth z = point_depth if np.isnan(z): return with TimeIt('Draw'): # Draw grasp markers on the points_out and publish it. (for visualisation) grasp_img = cv2.applyColorMap((points_out * 255).astype(np.uint8), cv2.COLORMAP_JET) grasp_img_plain = grasp_img.copy() rr, cc = circle(prev_mp[0], prev_mp[1], 5) grasp_img[rr, cc, 0] = 0 grasp_img[rr, cc, 1] = 255 grasp_img[rr, cc, 2] = 0 with TimeIt('Publish'): # Publish the output images (not used for control, only visualisation) grasp_img = bridge.cv2_to_imgmsg(grasp_img, 'bgr8') grasp_img.header = depth_message.header grasp_pub.publish(grasp_img) grasp_img_plain = bridge.cv2_to_imgmsg(grasp_img_plain, 'bgr8') grasp_img_plain.header = depth_message.header grasp_plain_pub.publish(grasp_img_plain) depth_pub.publish( bridge.cv2_to_imgmsg(depth_crop, encoding=depth_message.encoding)) ang_pub.publish(bridge.cv2_to_imgmsg(ang_out)) # Output the best grasp pose relative to camera. cmd_msg = Float32MultiArray() cmd_msg.data = [x, y, z, ang, width, depth_center] cmd_pub.publish(cmd_msg)