def icp_refinement(pts_tgt, obj_model, rot_pred, tra_pred, cam_K, ren): centroid_tgt = np.array([ np.mean(pts_tgt[:, 0]), np.mean(pts_tgt[:, 1]), np.mean(pts_tgt[:, 2]) ]) if (tra_pred[2] < 300 or tra_pred[2] > 5000): tra_pred = centroid_tgt * 1000 #opengl renderer (bit slower, more precise) if (gpu_rendering): img_init, depth_init = render_obj_gpu(obj_model, rot_pred, tra_pred / 1000, cam_K, ren) else: img_init, depth_init = render_obj(obj_models[obj_order_id], rot_pred, tra_pred / 1000, cam_K, ren) init_mask = depth_init > 0 bbox_init = get_bbox_from_mask(init_mask > 0) tf = np.eye(4) if (bbox_init[2] - bbox_init[0] < 10 or bbox_init[3] - bbox_init[1] < 10): return tf, -1 if (np.sum(init_mask) < 10): return tf, -1 points_src = np.zeros( (bbox_init[2] - bbox_init[0], bbox_init[3] - bbox_init[1], 6), np.float32) points_src[:, :, :3] = getXYZ(depth_init, cam_K[0, 0], cam_K[1, 1], cam_K[0, 2], cam_K[1, 2], bbox_init) points_src[:, :, 3:] = get_normal(depth_init, fx=cam_K[0, 0], fy=cam_K[1, 1], cx=cam_K[0, 2], cy=cam_K[1, 2], refine=True, bbox=bbox_init) points_src = points_src[init_mask[bbox_init[0]:bbox_init[2], bbox_init[1]:bbox_init[3]] > 0] #adjust the initial translation using centroids of visible points centroid_src = np.array([ np.mean(points_src[:, 0]), np.mean(points_src[:, 1]), np.mean(points_src[:, 2]) ]) trans_adjust = centroid_tgt - centroid_src tra_pred = tra_pred + trans_adjust * 1000 points_src[:, :3] += trans_adjust icp_fnc = cv2.ppf_match_3d_ICP(100, tolerence=0.05, numLevels=4) #1cm retval, residual, pose = icp_fnc.registerModelToScene( points_src.reshape(-1, 6), pts_tgt.reshape(-1, 6)) tf[:3, :3] = rot_pred tf[:3, 3] = tra_pred / 1000 #in m tf = np.matmul(pose, tf) return tf, residual
def icp_refinement(self, pts_tgt, obj_model, rot_pred, tra_pred): centroid_tgt = np.array([ np.mean(pts_tgt[:, 0]), np.mean(pts_tgt[:, 1]), np.mean(pts_tgt[:, 2]) ]) if (tra_pred[2] < 300 or tra_pred[2] > 5000): #when estimated translation is weired, set centroid of tgt points as translation tra_pred = centroid_tgt * 1000 img_init, depth_init = self.render_obj_pyrender( obj_model, rot_pred, tra_pred / 1000) init_mask = depth_init > 0 bbox_init = get_bbox_from_mask(init_mask > 0) if (bbox_init[2] - bbox_init[0] < 10 or bbox_init[3] - bbox_init[1] < 10): return tf, -1 if (np.sum(init_mask) < 10): return tf, -1 points_src = np.zeros( (bbox_init[2] - bbox_init[0], bbox_init[3] - bbox_init[1], 6), np.float32) points_src[:, :, :3] = getXYZ(depth_init, self.camK[0, 0], self.camK[1, 1], self.camK[0, 2], self.camK[1, 2], bbox_init) points_src[:, :, 3:] = get_normal(depth_init, fx=self.camK[0, 0], fy=self.camK[1, 1], cx=self.camK[0, 2], cy=self.camK[1, 2], refine=True, bbox=bbox_init) points_src = points_src[init_mask[bbox_init[0]:bbox_init[2], bbox_init[1]:bbox_init[3]] > 0] centroid_src = np.array([ np.mean(points_src[:, 0]), np.mean(points_src[:, 1]), np.mean(points_src[:, 2]) ]) trans_adjust = centroid_tgt - centroid_src tra_pred = tra_pred + trans_adjust * 1000 points_src[:, :3] += trans_adjust icp_fnc = cv2.ppf_match_3d_ICP(100, tolerence=0.05, numLevels=5) #1cm retval, residual, pose = icp_fnc.registerModelToScene( points_src.reshape(-1, 6), pts_tgt.reshape(-1, 6)) tf = np.eye(4) tf[:3, :3] = rot_pred tf[:3, 3] = tra_pred / 1000 #in m tf = np.matmul(pose, tf) return tf, residual
inst_count_good = np.zeros((len(inst_counts))) depth_valid = np.logical_and(depth_t > 0.2, depth_t < 2.2) rgb_valid = np.logical_or(depth_valid, depth_t_zero_nan == 0) image_t = image_t.astype(np.float32) image_t[np.invert(rgb_valid)] = 0.1 * image_t[np.invert(rgb_valid)] points_tgt = np.zeros((depth_t.shape[0], depth_t.shape[1], 6), np.float32) points_tgt[:, :, :3] = getXYZ(depth_t, fx=cam_K[0, 0], fy=cam_K[1, 1], cx=cam_K[0, 2], cy=cam_K[1, 2]) points_tgt[:, :, 3:] = get_normal(depth_t, fx=cam_K[0, 0], fy=cam_K[1, 1], cx=cam_K[0, 2], cy=cam_K[1, 2], refine=True) if (detect_type == 'rcnn'): rois, obj_orders, obj_ids, scores, masks = get_rcnn_detection( image_t, model) elif (detect_type == 'retinanet'): rois, obj_orders, obj_ids, scores = get_retinanet_detection( image_t, model) result_score = [] result_objid = [] result_R = [] result_t = [] result_roi = []
def callback(self, r_image): self.sub.unregister() timeout = 1 t_spend = 0 if (self.icp): while not (self.have_depth): time.sleep(0.01) t_spend += 0.01 if (t_spend > 1): break self.sub_depth.unregister() with self.graph.as_default(): depth_t = np.zeros((self.im_height, self.im_width)) if (self.have_depth): depth_t = np.copy(self.depth_img) depth_t = np.nan_to_num(depth_t) depth_valid = np.logical_and(depth_t > 0.2, depth_t < 3) if (np.max(depth_t) == 0): self.have_depth = False else: points_tgt = np.zeros( (depth_t.shape[0], depth_t.shape[1], 6), np.float32) points_tgt[:, :, :3] = getXYZ(depth_t, fx=self.camK[0, 0], fy=self.camK[1, 1], cx=self.camK[0, 2], cy=self.camK[1, 2]) points_tgt[:, :, 3:] = get_normal(depth_t, fx=self.camK[0, 0], fy=self.camK[1, 1], cx=self.camK[0, 2], cy=self.camK[1, 2], refine=True) data = ros_numpy.numpify(r_image) image = np.copy(data) bbox_pred = np.zeros((4), np.int) rois, obj_orders, obj_ids, scores, masks = self.get_rcnn_detection( image) result_scores = [] result_poses = [] result_poses_before = [] result_ids = [] result_bbox = [] img_detection = np.copy(image) img_pose = np.copy(image) if (self.icp): img_pose_noicp = np.copy(image) for r_id, roi in enumerate(rois): if (roi[0] == -1 and roi[1] == -1): continue obj_id = obj_ids[r_id] if (detect_type == 'rcnn'): mask_from_detect = masks[:, :, r_id] r_ = int(self.colors[obj_orders[r_id], 0]) g_ = int(self.colors[obj_orders[r_id], 1]) b_ = int(self.colors[obj_orders[r_id], 2]) img_detection[mask_from_detect, 0] = np.minimum( 255, img_detection[mask_from_detect, 0] + 0.8 * r_) img_detection[mask_from_detect, 1] = np.minimum( 255, img_detection[mask_from_detect, 1] + 0.8 * g_) img_detection[mask_from_detect, 2] = np.minimum( 255, img_detection[mask_from_detect, 2] + 0.8 * b_) cv2.rectangle(img_detection, (roi[1], roi[0]), (roi[3], roi[2]), (r_, g_, b_), 2) cv2.putText(img_detection, '{}'.format(obj_id), (roi[1], roi[0]), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, 1) if not (obj_id in self.target_objs): continue pix2pose_id = self.target_objs.index(obj_id) _,mask_pred,rot_pred,tra_pred,frac_inlier,_ =\ self.obj_pix2pose[pix2pose_id].est_pose(image,roi.astype(np.int)) if (frac_inlier == -1): continue pred_tf_ori = np.eye(4) pred_tf_ori[:3, :3] = rot_pred pred_tf_ori[:3, 3] = tra_pred * self.model_scale if (detect_type == 'rcnn'): union_mask = np.logical_or(mask_from_detect, mask_pred) union = np.sum(union_mask) if (union == 0): mask_iou = 0 score = 0 else: mask_iou = np.sum( np.logical_and(mask_from_detect, mask_pred)) / union score = scores[r_id] * frac_inlier * mask_iou * 1000 if (self.have_depth and self.icp): union_mask = np.logical_and(union_mask, depth_valid) pts_tgt = points_tgt[union_mask] tf, residual = self.icp_refinement( pts_tgt, self.obj_models[pix2pose_id], rot_pred, tra_pred) if (residual == -1): continue rot_pred = tf[:3, :3] tra_pred = tf[:3, 3] * 1000 score = scores[r_id] / (residual + 0.00001) else: score = scores[r_id] result_scores.append(score) tra_pred = tra_pred * self.model_scale #mm to m if (tra_pred[2] < 0.1 or tra_pred[2] > 5): continue pred_tf = np.eye(4) pred_tf[:3, :3] = rot_pred pred_tf[:3, 3] = tra_pred result_poses.append(pred_tf) result_ids.append(pix2pose_id) result_bbox.append(roi) result_poses_before.append(pred_tf_ori) #currently publish pose of the top scored objects self.detect_pub.publish( ros_numpy.msgify(ros_image, img_detection[:, :, ::-1], encoding='bgr8')) #render detection results #render pose estimation results for o_id, tf, score, roi in zip(result_ids, result_poses, result_scores, result_bbox): if (self.icp): img_pose = self.draw_3d_poses(self.obj_bboxes[o_id], tf, img_pose) else: img_pose = self.draw_3d_poses(self.obj_bboxes[o_id], tf, img_pose) cv2.putText(img_pose, '{:.3f}'.format(score), (roi[1], roi[0]), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, 1) if (self.icp and self.pub_before_icp): for o_id, tf in zip(result_ids, result_poses_before): img_pose_noicp = self.draw_3d_poses( self.obj_bboxes[o_id], tf, img_pose_noicp) self.pose_pub_noicp.publish( ros_numpy.msgify(ros_image, img_pose_noicp[:, :, ::-1], encoding='bgr8')) self.pose_pub.publish( ros_numpy.msgify(ros_image, img_pose[:, :, ::-1], encoding='bgr8')) self.sub = rospy.Subscriber(self.rgb_topic, ros_image, self.callback, queue_size=1) if (self.icp): self.sub_depth = rospy.Subscriber(self.depth_topic, ros_image, self.callback_depth, queue_size=1) self.have_depth = False