def calcDistance(self): self.newObjArray = Detection3DArray() for obj in self.lastObj.detections: # keep old values newObj = Detection3D() newObj.bbox.center.position.x = obj.bbox.center.x newObj.bbox.center.position.y = obj.bbox.center.y newObj.bbox.size.x = obj.bbox.size_x newObj.bbox.size.y = obj.bbox.size_y # calculate values angle, angle_min, angle_max = self.calcAngle( obj.bbox.center.x, obj.bbox.size_x) distance = self.getDistance(angle, angle_min, angle_max) if distance == None: distance = 0 # add calculated values newObj.bbox.center.orientation.w = angle newObj.bbox.center.position.z = distance self.newObjArray.detections.append(newObj) self.lastPub = self.newObjArray self.distance_pub.publish(self.newObjArray)
def __init__(self): # init Node rospy.init_node("distance_node") rospy.loginfo("Starting DistanceNode.") # Subscriber/Publisher rospy.Subscriber("/scan", LaserScan, self.lidar_calback) rospy.Subscriber("/objectDedector/objects", Detection2DArray, self.obj_callback) self.distance_pub = rospy.Publisher("/objectDedector/3Dobjects", Detection3DArray, queue_size=1) # get params self.cam_width = float(rospy.get_param("/usb_cam/image_width")) self.flip_image = bool( rospy.get_param("/usb_cam/flip_image", default="True")) cam_fov = float(rospy.get_param("/usb_cam/fov")) self.angleFactor = cam_fov / self.cam_width # init messages self.lastScan = LaserScan() self.lastScan.angle_increment = 1 self.lastObj = Detection2DArray() self.lastPub = Detection3DArray() # publish image with distances if True: rospy.Subscriber("/objectDedector/overlayImage", Image, self.img_callback) self.img_pub = rospy.Publisher("/objectDedector/overlayImage3D", Image, queue_size=1) self.bridge = CvBridge() self.font = cv2.FONT_HERSHEY_SIMPLEX
def filter_dbscan(self, detections_msg): # Get frame_id for drone body frame: 'drone_N/camera_array' -> 'drone_N' frame_id = detections_msg.header.frame_id.split('/')[0] + '/base_link' # Return early if we only have zero or one detection if len(detections_msg.detections) <= 1: detections_msg.header.frame_id = frame_id return detections_msg positions = [] for detection in detections_msg.detections: p = detection.bbox.center.position position = np.array([p.x, p.y]) if self.dimensions == 2 \ else np.array([p.x, p.y, p.z]) positions.append(position) positions = np.array(positions) # Greedily find all positions with distance less than threshold self.dbscan.fit(positions) # Merge close positions as the average of all cluster positions labels = np.array(self.dbscan.labels_) indices = set(labels) filtered = np.array( [positions[labels == i].mean(axis=0) for i in indices]) filtered_detections = Detection3DArray() filtered_detections.header.frame_id = frame_id filtered_detections.header.stamp = detections_msg.header.stamp for filtered_detection in filtered: detection = Detection3D() detection.header = detections_msg.header detection.bbox.center.position.x = filtered_detection[0] detection.bbox.center.position.y = filtered_detection[1] if self.dimensions == 3: detection.bbox.center.position.z = filtered_detection[2] filtered_detections.detections.append(detection) return filtered_detections
def callback_detect_3d(self, req): try: image = rospy.wait_for_message("~input", Image, timeout=2) except rospy.ROSException as e: s = "could not get image from '" + rospy.resolve_name( "~input") + "'" rospy.logerr(s) raise rospy.ServiceException(s) resp = Detect3DResponse() detections = self._get_detections(image) for d2d in detections.detections: d3d = Detection3D() d3d.header = d2d.header d3d.results = d2d.results # todo calc bbox real poses d3d.bbox.center.position.x = d2d.bbox.center.x - 0.5 d3d.bbox.center.position.y = d2d.bbox.center.y - 0.5 d3d.bbox.center.position.z = 1 d3d.bbox.center.orientation.w = 1 d3d.bbox.size.x = 0.1 d3d.bbox.size.y = 0.1 d3d.bbox.size.z = 0.1 resp.detections.append(d3d) if len(detections.detections) > 0 and self.publish_detections: msg = Detection3DArray() msg.header = d3d.header msg.detections = resp.detections self.pub.publish(msg) return resp
def detections_callback(self, raw_msg): use_dbscan, use_gmphd = self.config['use_dbscan'], self.config[ 'use_gmphd'] filtered_msg = Detection3DArray() filtered_msg.header = raw_msg.header # Filter detections of same object from multiple cameras using DBSCAN # DBSCAN guarantees that we only have one observation per true object filtered_msg = self.filter_dbscan(raw_msg) if use_dbscan else raw_msg # Filter detections with GM-PHD filter # GM-PHD requires at most one observation per true object filtered_msg = self.filter_gmphd( filtered_msg) if use_gmphd else filtered_msg # Log information # num_raw = len(raw_msg.detections) num_filtered = len(filtered_msg.detections) # rospy.loginfo('Detections raw/filtered: {}/{}'.format(num_raw, num_filtered)) # Publish everything self.detections_pub.publish(filtered_msg) self.num_target_pub.publish(Int32(num_filtered))
def inference(self, rgb, depth, pcl_msg): ## 1. Get rgb, depth, point cloud start_time = time.time() camera_header = rgb.header rgb = self.bridge.imgmsg_to_cv2(rgb, desired_encoding='bgr8') rgb = PIL.Image.fromarray(np.uint8(rgb), mode="RGB") rgb_img = np.uint8(rgb.resize((self.params["width"], self.params["height"]), PIL.Image.BICUBIC)) # rgb_rect = self.bridge.imgmsg_to_cv2(rgb_rect, desired_encoding='bgr8') # rgb_rect = PIL.Image.fromarray(np.uint8(rgb_rect), mode="RGB") # rgb_rect_img = np.uint8(rgb_rect.resize((self.params["width"], self.params["height"]), PIL.Image.BICUBIC)) depth = self.bridge.imgmsg_to_cv2(depth, desired_encoding='32FC1') depth = cv2.resize(depth, dsize=(self.params["width"], self.params["height"]), interpolation=cv2.INTER_AREA) rgb = self.rgb_transform(rgb_img).unsqueeze(0) print("-------------------------------------") # print("=> Get Input \t {}".format(time.time()-start_time)) ## 2. Instance Segmentation using Mask R-CNN is_results = self.model(rgb.to(self.device))[0] pred_masks = is_results["masks"].cpu().detach().numpy() pred_boxes = is_results['boxes'].cpu().detach().numpy() pred_labels = is_results['labels'].cpu().detach().numpy() pred_scores = is_results['scores'].cpu().detach().numpy() boxes, scores, is_obj_ids, rgb_crops, is_masks = [], [], [], [], [] for i, (label, (x1, y1, x2, y2), score, mask) in enumerate(zip(pred_labels, pred_boxes, pred_scores, pred_masks)): if score < self.params["is_thresh"]: continue if x1 < self.roi[0] or x2 > self.roi[1] or y1 < self.roi[2] or y2 > self.roi[3]: continue boxes.append(np.array([x1, y1, x2, y2])) scores.append(score) label = label-1 is_obj_ids.append(label) is_masks.append(mask[0]) if self.use_masked_crop[label]: mask[mask >= 0.5] = 1 mask[mask < 0.5] = 0.3 mask = np.repeat(mask, 3, 0) mask = np.transpose(mask, (1, 2, 0)) masked_img = rgb_img * mask else: masked_img = rgb_img x = int(x1) y = int(y1) h = int(y2 - y1) w = int(x2 - x1) if self.use_sameWH_crop[label]: w_offset = int(np.maximum(h, w) * self.pad_factors[label]) h_offset = int(np.maximum(h, w) * self.pad_factors[label]) else: w_offset = int(w * self.pad_factors[label]) h_offset = int(h * self.pad_factors[label]) left = np.maximum(x+w//2-w_offset//2, 0) right = x+w//2+w_offset/2 top = np.maximum(y+h//2-h_offset//2, 0) bottom = y+h//2+h_offset//2 rgb_crop = masked_img[top:bottom, left:right].copy() if self.params["black_borders"]: rgb_crop[:(y-top),:] = 0 rgb_crop[(y+h-top):,:] = 0 rgb_crop[:,:(x-left)] = 0 rgb_crop[:,(x+w-left):] = 0 rgb_crop = cv2.resize(rgb_crop, (128, 128), interpolation=cv2.INTER_NEAREST) rgb_crops.append(rgb_crop) # print("=> Inst Seg \t {}".format(time.time()-start_time)) ## 3. 6D Object Pose Estimation using MPAAE if len(is_masks) == 0: return all_pose_estimates = [] for i, (box, score, label, rgb_crop) in enumerate(zip(boxes, scores, is_obj_ids, rgb_crops)): box_xywh = [box[0], box[1], box[2]-box[0], box[3]-box[1]] H_aae = np.eye(4) class_idx = self.pe_class_names.index(self.is_class_names[label]) R_est, t_est, _ = self.codebook.auto_pose6d(self.sess, rgb_crop, box_xywh, self.intrinsic_matrix, 1, self.train_args, self.codebook._get_codebook_name(self.ply_centered_paths[class_idx]), refine=False) H_aae[:3,:3] = R_est H_aae[:3, 3] = t_est all_pose_estimates.append(H_aae) # print("=> Pose Est \t {}".format(time.time()-start_time)) ## 4. 6D Object Pose Refinement using ICP if "merged" in self.params["point"]: cloud_map = convert_ros_to_o3d(pcl_msg) cloud_cam = do_transform_o3d_cloud(copy.deepcopy(cloud_map), self.transform_map_to_cam) else: cloud_cam = convert_ros_to_o3d(pcl_msg, remove_nans=True) aae_trans = [] aae_rot = [] icp_trans = [] icp_rot = [] for i, (pose_estimate, label) in enumerate(zip(all_pose_estimates, is_obj_ids)): # crop cloud with instance mask is_mask = is_masks[i].copy() is_mask[is_mask < 0.5] = 0 cloud_cam_obj = crop_o3d_cloud_with_mask(cloud_cam, is_mask, camera_info=self.camera_info) pe_obj_id = self.pe_class_names.index(self.is_class_names[label]) # cloud_cam_obj, index = cloud_cam_obj.remove_statistical_outlier(nb_neighbors = 50, std_ratio=0.2) # transform cloud_obj to the origin of camera frame cloud_obj = copy.deepcopy(self.cloud_objs[pe_obj_id]) H_obj2cam = np.eye(4) # add centeroids for misaligned CAD H_obj2cam[:3, 3] = - cloud_obj.get_center() + self.centroids[pe_obj_id] cloud_obj = cloud_obj.transform(H_obj2cam) # transform cloud_obj to the estimated 6d pose H_aae_cam2obj = np.eye(4) H_aae_cam2obj[:3, :3] = pose_estimate[:3, :3] H_aae_cam2obj[:3, 3] = pose_estimate[:3, 3] # align scale cloud_obj = cloud_obj.transform(H_aae_cam2obj) # translate cloud_obj to the centroid of cloud cam H_obj_to_cam_centroid = cloud_cam_obj.get_center() - cloud_obj.get_center() # print(H_aae_cam2obj[:3, 3][2], H_obj_to_cam_centroid[2]) H_aae_cam2obj[:3, 3] = H_aae_cam2obj[:3, 3] + H_obj_to_cam_centroid all_pose_estimates[i][:3, 3] = H_aae_cam2obj[:3, 3] * 1000 cloud_obj = cloud_obj.translate(H_obj_to_cam_centroid) # o3d.visualization.draw_geometries([cloud_obj, cloud_cam_obj]) # icp refinement icp_result, residual = icp_refinement_with_ppf_match(cloud_obj, cloud_cam_obj, n_points=self.params["n_points"], n_iter=self.params["n_iter"], tolerance=self.params["tolerance"], num_levels=self.params["num_levels"]) # print("{}: \t res: {}".format(self.is_class_names[label], residual)) if residual < 1: H_refined_cam2obj = np.matmul(icp_result, H_aae_cam2obj) else: H_refined_cam2obj = H_aae_cam2obj all_pose_estimates[i][:3, :3] = H_refined_cam2obj[:3, :3] T_centroid = np.eye(4) T_centroid[:3, 3] = self.centroids[pe_obj_id] translation = H_aae_cam2obj[:3, 3] rotation = tf_trans.quaternion_from_matrix(H_aae_cam2obj) aae_trans.append(translation) aae_rot.append(rotation) translation = H_refined_cam2obj[:3, 3] rotation = tf_trans.quaternion_from_matrix(H_refined_cam2obj) icp_trans.append(translation) icp_rot.append(rotation) # print("=> ICP \t {}".format(time.time()-start_time)) # 5. calculate reprojection error (VSD) and publish the detection3D pe_obj_ids, pe_masks, pe_depth = self.reproject_pe(self.pose_aae_vis_pub, all_pose_estimates, is_obj_ids, boxes, scores, rgb_img) vsd_errors = self.get_vsd_error(self.vis_vsd_costmap, is_masks, is_obj_ids, depth, pe_masks, pe_obj_ids, pe_depth) cou_errors = self.get_cou_error(self.vis_cou_costmap, is_masks, is_obj_ids, depth, pe_masks, pe_obj_ids, pe_depth) reproj_errors = np.mean(np.vstack([vsd_errors, cou_errors]), axis=0) aae_detections_array = Detection3DArray() aae_detections_array.header = camera_header icp_detections_array = Detection3DArray() icp_detections_array.header = camera_header # print("=> Get reprojErr \t {}".format(time.time()-start_time)) for i, is_obj_id in enumerate(is_obj_ids): class_id = self.pe_class_names.index(self.is_class_names[is_obj_id]) aae_pose_msg, aae_detection = self.gather_pose_results(camera_header, class_id, aae_trans[i], aae_rot[i], reproj_errors[i]) icp_pose_msg, icp_detection = self.gather_pose_results(camera_header, class_id, icp_trans[i], icp_rot[i], reproj_errors[i]) aae_detections_array.detections.append(aae_detection) icp_detections_array.detections.append(icp_detection) self.publish_vis_is(rgb_img, is_masks, boxes, is_obj_ids, scores) self.aae_detections_pub.publish(aae_detections_array) self.icp_detections_pub.publish(icp_detections_array) self.publish_markers(self.aae_markers_pub, aae_detections_array, [13, 255, 128]) self.publish_markers(self.icp_markers_pub, icp_detections_array, [128, 128, 255])
def filter_gmphd(self, detections_msg): # Check when the last detection message arrived if self.last_detections_msg is None: self.last_detections_msg = detections_msg rospy.loginfo('GMPHD filter: last detection message initialized') return detections_msg # Compute time difference (dt) from last message timestamp last_detection_time = self.last_detections_msg.header.stamp.to_sec() this_detection_time = detections_msg.header.stamp.to_sec() dt = this_detection_time - last_detection_time self.last_detections_msg = detections_msg # Set dt for those GM-PHD filters that have it (e.g. EK-PHD) if hasattr(self.gmphd, 'dt'): self.gmphd.dt = dt # Update process noise covariance matrix (which depends on dt) self.gmphd.Q = self._compute_process_noise_covariance(dt) # Generate birth components for PHD filter birth_components = [] for detection in detections_msg.detections: birth_component = deepcopy(self.gmphd.birth_component) p = detection.bbox.center.position birth_component.weight = self.config['birth_weight'] birth_component.mean[0] = p.x birth_component.mean[1] = p.y if self.dimensions == 3: birth_component.mean[2] = p.z birth_components.append(birth_component) self.gmphd.birth_components = birth_components # Convert observations from cartesian into spherical coordinates observations = [] for detection in detections_msg.detections: p = detection.bbox.center.position observation = cartesian2polar([p.x, p.y]) if self.dimensions == 2 \ else cartesian2spherical([p.x, p.y, p.z]) observations.append(observation) observations = np.array(observations) # Prepare control inputs (estimated velocity) control_inputs = np.zeros(self.gmphd.dim_x) if self.velocity is not None: control_inputs[:self.dimensions] = self.velocity[:self.dimensions] control_inputs = control_inputs[..., np.newaxis] # Update filter self.gmphd.filter(observations, control_inputs) # Prune self.gmphd.prune(trunc_thresh=self.config['trunc_thresh'], merge_thresh=self.config['merge_thresh'], max_components=self.config['max_components']) # Get frame_id for drone body frame: 'drone_N/camera_array' -> 'drone_N' frame_id = detections_msg.header.frame_id.split('/')[0] + '/base_link' # Publishes the biggest component as PoseWithCovariance (for visualization only!) if len(self.gmphd.components) > 0: comp = self.gmphd.components[0] pose_cov_msg = PoseWithCovarianceStamped() pose_cov_msg.header.frame_id = frame_id pose_cov_msg.header.stamp = rospy.Time.now() # Use mean as position pose_cov_msg.pose.pose.position.x = comp.mean[0] pose_cov_msg.pose.pose.position.y = comp.mean[1] pose_cov_msg.pose.pose.position.z = comp.mean[2] # Prepare covariance matrix covariance = np.zeros((6, 6)) # 6x6 row-major matrix dimz = self.gmphd.dim_z covariance[:dimz, : dimz] = comp.cov[:dimz, : dimz] # Need only position covariance pose_cov_msg.pose.covariance = list(covariance.flatten()) self.pose_cov_pub.publish(pose_cov_msg) filtered_detections = Detection3DArray() filtered_detections.header.frame_id = frame_id filtered_detections.header.stamp = rospy.Time.now() for comp in self.gmphd.components: # Only report components above weight threshold as detections if comp.weight < self.config['weight_thresh']: continue detection = Detection3D() detection.header = detections_msg.header detection.bbox.center.position.x = comp.mean[0] detection.bbox.center.position.y = comp.mean[1] detection.bbox.size.x = 2 * np.sqrt(comp.cov[0, 0]) detection.bbox.size.y = 2 * np.sqrt(comp.cov[1, 1]) detection.bbox.size.z = 0.1 # rviz complains about scale 0 otherwise if self.dimensions == 3: detection.bbox.center.position.z = comp.mean[2] detection.bbox.size.z = 2 * np.sqrt(comp.cov[2, 2]) filtered_detections.detections.append(detection) return filtered_detections
def detections_callback(self, detections_msg): rospy.loginfo_once('Got detections') out_detections_msg = Detection3DArray() out_detections_msg.header.stamp = rospy.Time.now() out_detections_msg.header.frame_id = detections_msg.header.frame_id for detection in detections_msg.detections: # Get localizer based on detection frame_id frame_id = detection.header.frame_id # drone_X/camera_X_optical camera_ns = frame_id.split('/')[-1].replace('_optical', '') # camera_X localizer = self.localizers[camera_ns] # Calculate unit-norm bearing from bounding box and physical object size bbox_center = (detection.bbox.center.x, detection.bbox.center.y) bbox_size = (detection.bbox.size_x, detection.bbox.size_y) object_width = self.config['object_width'] object_height = self.config['object_height'] object_depth = self.config['object_depth'] object_size = (object_width, object_height, object_depth) bearing = localizer.detection_to_bearing(bbox_center, bbox_size, object_size) # Transform bearing vector from camera/optical frame to body/base_link frame source_frame = detection.header.frame_id # drone_X/camera_X_optical target_frame = source_frame.split('/')[0] + '/base_link' try: transform = self.buffer.lookup_transform( target_frame=target_frame, source_frame=source_frame, time=rospy.Time(0)) except Exception as e: print(e) continue point = PointStamped() point.header.frame_id = source_frame point.header.stamp = rospy.Time.now() point.point.x = bearing[0] point.point.y = bearing[1] point.point.z = bearing[2] point_tf = tf2_geometry_msgs.do_transform_point(point, transform) out_detection = Detection3D() out_detection.header = point_tf.header out_detection.bbox.center.position.x = point_tf.point.x out_detection.bbox.center.position.y = point_tf.point.y out_detection.bbox.center.position.z = point_tf.point.z out_detection.bbox.size.x = object_depth out_detection.bbox.size.y = object_width out_detection.bbox.size.z = object_height out_detections_msg.detections.append(out_detection) self.detections_pub.publish(out_detections_msg)
def image_callback(self, image_msg, category_names_to_id): """Image callback""" img = self.cv_bridge.imgmsg_to_cv2(image_msg, "rgb8") # cv2.imwrite('img.png', cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # for debugging # Update camera matrix and distortion coefficients if self.input_is_rectified: P = np.matrix(camera_info.P, dtype='float64') P.resize((3, 4)) camera_matrix = P[:, :3] dist_coeffs = np.zeros((4, 1)) else: camera_matrix = np.matrix(camera_info.K, dtype='float64') camera_matrix.resize((3, 3)) dist_coeffs = np.matrix(camera_info.D, dtype='float64') dist_coeffs.resize((len(camera_info.D), 1)) # Downscale image if necessary height, width, _ = img.shape scaling_factor = float(self.downscale_height) / height if scaling_factor < 1.0: camera_matrix[:2] *= scaling_factor img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height))) for m in self.models: self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix) self.pnp_solvers[m].set_dist_coeffs(dist_coeffs) # Copy and draw image img_copy = img.copy() im = Image.fromarray(img_copy) draw = Draw(im) detection_array = Detection3DArray() detection_array.header = image_msg.header for m in self.models: # Detect object results = ObjectDetector.detect_object_in_image( self.models[m].net, self.pnp_solvers[m], img, self.config_detect ) # Publish pose and overlay cube on image for i_r, result in enumerate(results): if result["location"] is None: continue loc = result["location"] ori = result["quaternion"] # transform orientation transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m]) # rotate bbox dimensions if necessary # (this only works properly if model_transform is in 90 degree angles) dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m]) dims = np.absolute(dims) dims = tuple(dims) pose_msg = PoseStamped() pose_msg.header = image_msg.header CONVERT_SCALE_CM_TO_METERS = 100 pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.orientation.x = transformed_ori[0] pose_msg.pose.orientation.y = transformed_ori[1] pose_msg.pose.orientation.z = transformed_ori[2] pose_msg.pose.orientation.w = transformed_ori[3] # Publish self.pubs[m].publish(pose_msg) self.pub_dimension[m].publish(str(dims)) # Add to Detection3DArray detection = Detection3D() hypothesis = ObjectHypothesisWithPose() hypothesis.id = self.class_ids[result["name"]] hypothesis.score = result["score"] hypothesis.pose.pose = pose_msg.pose detection.results.append(hypothesis) detection.bbox.center = pose_msg.pose detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS detection_array.detections.append(detection) # Draw the cube if None not in result['projected_points']: points2d = [] for pair in result['projected_points']: points2d.append(tuple(pair)) draw.draw_cube(points2d, self.draw_colors[m]) # Publish the image with results overlaid self.pub_rgb_dope_points.publish( CvBridge().cv2_to_imgmsg( np.array(im)[..., ::-1], "bgr8" ) ) self.pub_detections.publish(detection_array) self.publish_markers(detection_array) return annotations
def run_on_image(self, image_filename, category_names_to_id, output_image_filepath): camera_info = self.camera_info if not self.info_manager.isCalibrated(): rospy.logwarn('Camera is not calibrated, please supply a valid camera_info_url parameter!') img = cv2.imread(image_filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Update camera matrix and distortion coefficients # camera_matrix = np.matrix(camera_info.K, dtype='float64') # camera_matrix.resize((3, 3)) # camera_matrix = np.array([[768.16058349609375, 0, 480], # [0, 768.16058349609375, 270], # [0, 0, 1]]) # dist_coeffs = np.matrix(camera_info.D, dtype='float64') # dist_coeffs.resize((len(camera_info.D), 1)) if self.input_is_rectified: P = np.matrix(camera_info.P, dtype='float64') P.resize((3, 4)) camera_matrix = P[:, :3] dist_coeffs = np.zeros((4, 1)) else: camera_matrix = np.matrix(camera_info.K, dtype='float64') camera_matrix.resize((3, 3)) dist_coeffs = np.matrix(camera_info.D, dtype='float64') dist_coeffs.resize((len(camera_info.D), 1)) # Downscale image if necessary height, width, _ = img.shape scaling_factor = float(self.downscale_height) / height if scaling_factor < 1.0: camera_matrix[:2] *= scaling_factor img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height))) for m in self.models: self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix) self.pnp_solvers[m].set_dist_coeffs(dist_coeffs) # Copy and draw image img_copy = img.copy() im = Image.fromarray(img_copy) draw = Draw(im) detection_array = Detection3DArray() # detection_array.header = image_msg.header detection_array.header = "camera" annotations = [] for m in self.models: # Detect object results = ObjectDetector.detect_object_in_image( self.models[m].net, self.pnp_solvers[m], img, self.config_detect ) print("results : {}".format(results)) # Publish pose and overlay cube on image for i_r, result in enumerate(results): if result["location"] is None: continue loc = result["location"] ori = result["quaternion"] # transform orientation # transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m]) transformed_ori = quaternion_multiply(ori, self.model_transforms[m]) # rotate bbox dimensions if necessary # (this only works properly if model_transform is in 90 degree angles) dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m]) dims = np.absolute(dims) dims = tuple(dims) pose_msg = PoseStamped() # pose_msg.header = image_msg.header pose_msg.header.stamp = rospy.Time.now() pose_msg.header.frame_id = "camera" CONVERT_SCALE_CM_TO_METERS = 100 pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.orientation.x = transformed_ori[0] pose_msg.pose.orientation.y = transformed_ori[1] pose_msg.pose.orientation.z = transformed_ori[2] pose_msg.pose.orientation.w = transformed_ori[3] annotations.append({ 'location' : loc, 'quaternion_xyzw' : transformed_ori.tolist(), 'category_id' : category_names_to_id[m], 'id' : i_r }) # Publish self.pubs[m].publish(pose_msg) # self.pub_dimension[m].publish(str(dims)) # Add to Detection3DArray detection = Detection3D() hypothesis = ObjectHypothesisWithPose() hypothesis.id = self.class_ids[result["name"]] hypothesis.score = result["score"] hypothesis.pose.pose = pose_msg.pose detection.results.append(hypothesis) detection.bbox.center = pose_msg.pose detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS detection_array.detections.append(detection) # Draw the cube if None not in result['projected_points']: points2d = [] for pair in result['projected_points']: points2d.append(tuple(pair)) draw.draw_cube(points2d, self.draw_colors[m]) # Publish the image with results overlaid final_im = np.array(im)[..., ::-1] self.pub_rgb_dope_points.publish( CvBridge().cv2_to_imgmsg( final_im, "bgr8" ) ) cv2.imwrite(output_image_filepath, np.array(im)[..., ::-1]) self.pub_detections.publish(detection_array) self.publish_markers(detection_array) return annotations
def run_on_image_icp(self, image_filename, category_names_to_id, cloud_in, output_image_filepath, publish_cloud=True): """Image callback""" camera_info = self.camera_info if not self.info_manager.isCalibrated(): rospy.logwarn('Camera is not calibrated, please supply a valid camera_info_url parameter!') img = cv2.imread(image_filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Update camera matrix and distortion coefficients if self.input_is_rectified: P = np.matrix(camera_info.P, dtype='float64') P.resize((3, 4)) camera_matrix = P[:, :3] dist_coeffs = np.zeros((4, 1)) else: camera_matrix = np.matrix(camera_info.K, dtype='float64') camera_matrix.resize((3, 3)) dist_coeffs = np.matrix(camera_info.D, dtype='float64') dist_coeffs.resize((len(camera_info.D), 1)) # Downscale image if necessary height, width, _ = img.shape scaling_factor = float(self.downscale_height) / height if scaling_factor < 1.0: camera_matrix[:2] *= scaling_factor img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height))) for m in self.models: self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix) self.pnp_solvers[m].set_dist_coeffs(dist_coeffs) # Copy and draw image img_copy = img.copy() im = Image.fromarray(img_copy) draw = Draw(im) detection_array = Detection3DArray() detection_array.header = "camera" annotations = [] start_time = time.time() for m in self.models: # Detect object results = ObjectDetector.detect_object_in_image( self.models[m].net, self.pnp_solvers[m], img, self.config_detect ) # Publish pose and overlay cube on image for i_r, result in enumerate(results): if result["location"] is None: continue print(result) loc = result["location"] ori = result["quaternion"] CONVERT_SCALE_CM_TO_METERS = 100 pose_frame = "camera" rospy.logwarn("Doing ICP for result : {}, {}".format(i_r, result["name"])) loc_scale = np.array([loc[0] / 100, loc[1] / 100, loc[2] / 100]) R = tf.transformations.quaternion_matrix(ori) T = tf.transformations.translation_matrix(loc_scale) total_transform = tf.transformations.concatenate_matrices(T, R) cloud_filtered_array = self.transform_cloud(self.mesh_clouds[result["name"]], mat=total_transform) cloud_color = np.zeros(cloud_filtered_array.shape[0]) ros_msg = self.xyzrgb_array_to_pointcloud2( cloud_filtered_array, cloud_color, rospy.Time.now(), "camera" ) # self.pub_pose_cloud.publish(ros_msg) # rospy.logwarn("Num points after downsample and filter : {}".format(cloud_filtered_array.shape[0])) cloud_pose = pcl.PointCloud() cloud_pose.from_array(cloud_filtered_array) icp = cloud_pose.make_GeneralizedIterativeClosestPoint() converged, transf, estimate, fitness = icp.gicp(cloud_pose, cloud_in, max_iter=25) print('has converged:' + str(converged) + ' score: ' + str(fitness)) print(str(transf)) total_transform_icp = tf.transformations.concatenate_matrices(transf, total_transform) print(str(total_transform_icp)) loc = tf.transformations.translation_from_matrix(total_transform_icp) * 100 ori = tf.transformations.quaternion_from_matrix(total_transform_icp) pose_frame = "camera" if publish_cloud : cloud_filtered_array = self.transform_cloud(self.mesh_clouds[result["name"]], mat=total_transform_icp) cloud_color = np.zeros(cloud_filtered_array.shape[0]) ros_msg = self.xyzrgb_array_to_pointcloud2( cloud_filtered_array, cloud_color, rospy.Time.now(), "camera" ) self.pub_pose_cloud.publish(ros_msg) # transform orientation transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m]) # rotate bbox dimensions if necessary # (this only works properly if model_transform is in 90 degree angles) dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m]) dims = np.absolute(dims) dims = tuple(dims) pose_msg = PoseStamped() # pose_msg.header = image_msg.header pose_msg.header.stamp = rospy.Time.now() pose_msg.header.frame_id = pose_frame pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.orientation.x = transformed_ori[0] pose_msg.pose.orientation.y = transformed_ori[1] pose_msg.pose.orientation.z = transformed_ori[2] pose_msg.pose.orientation.w = transformed_ori[3] annotations.append({ 'location' : loc, 'quaternion_xyzw' : transformed_ori.tolist(), 'category_id' : category_names_to_id[m], 'id' : i_r }) # Publish self.pubs[m].publish(pose_msg) self.pub_dimension[m].publish(str(dims)) # Add to Detection3DArray detection = Detection3D() hypothesis = ObjectHypothesisWithPose() hypothesis.id = self.class_ids[result["name"]] hypothesis.score = result["score"] hypothesis.pose.pose = pose_msg.pose detection.results.append(hypothesis) detection.bbox.center = pose_msg.pose detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS detection_array.detections.append(detection) # Draw the cube if None not in result['projected_points'] and publish_cloud: points2d = [] for pair in result['projected_points']: points2d.append(tuple(pair)) draw.draw_cube(points2d, self.draw_colors[m]) # Publish the image with results overlaid runtime = (time.time() - start_time) self.pub_rgb_dope_points.publish( CvBridge().cv2_to_imgmsg( np.array(im)[..., ::-1], "bgr8" ) ) final_im = np.array(im)[..., ::-1] cv2.imwrite(output_image_filepath, np.array(im)[..., ::-1]) self.pub_detections.publish(detection_array) self.publish_markers(detection_array) return annotations, runtime
def run_dope_node(params, freq=5): '''Starts ROS node to listen to image topic, run DOPE, and publish DOPE results''' global g_img global g_draw pubs = {} models = {} pnp_solvers = {} pub_dimension = {} draw_colors = {} class_ids = {} model_transforms = {} dimensions = {} mesh_scales = {} meshes = {} # Initialize parameters matrix_camera = np.zeros((3,3)) matrix_camera[0,0] = params["camera_settings"]['fx'] matrix_camera[1,1] = params["camera_settings"]['fy'] matrix_camera[0,2] = params["camera_settings"]['cx'] matrix_camera[1,2] = params["camera_settings"]['cy'] matrix_camera[2,2] = 1 dist_coeffs = np.zeros((4,1)) if "dist_coeffs" in params["camera_settings"]: dist_coeffs = np.array(params["camera_settings"]['dist_coeffs']) config_detect = lambda: None config_detect.mask_edges = 1 config_detect.mask_faces = 1 config_detect.vertex = 1 config_detect.threshold = 0.5 config_detect.softmax = 1000 config_detect.thresh_angle = params['thresh_angle'] config_detect.thresh_map = params['thresh_map'] config_detect.sigma = params['sigma'] config_detect.thresh_points = params["thresh_points"] # For each object to detect, load network model, create PNP solver, and start ROS publishers for model in params['weights']: models[model] =\ ModelData( model, g_path2package + "/weights/" + params['weights'][model] ) models[model].load_net_model() mesh_scales[model] = 1.0 model_transforms[model] = np.array([0.0, 0.0, 0.0, 1.0], dtype='float64') draw_colors[model] = \ tuple(params["draw_colors"][model]) class_ids[model] = \ (params["class_ids"][model]) dimensions[model] = tuple(params["dimensions"][model]) pnp_solvers[model] = \ CuboidPNPSolver( model, matrix_camera, Cuboid3d(params['dimensions'][model]), dist_coeffs=dist_coeffs ) pubs[model] = \ rospy.Publisher( '{}/pose_{}'.format(params['topic_publishing'], model), PoseStamped, queue_size=10 ) pub_dimension[model] = \ rospy.Publisher( '{}/dimension_{}'.format(params['topic_publishing'], model), String, queue_size=10 ) # Start ROS publisher pub_rgb_dope_points = \ rospy.Publisher( params['topic_publishing']+"/rgb_points", ImageSensor_msg, queue_size=10 ) pub_detections = \ rospy.Publisher( 'detected_objects', Detection3DArray, queue_size=10 ) pub_markers = \ rospy.Publisher( 'markers', MarkerArray, queue_size=10 ) # Starts ROS listener rospy.Subscriber( topic_cam, ImageSensor_msg, __image_callback ) # Initialize ROS node rospy.init_node('dope_vis', anonymous=True) rate = rospy.Rate(freq) print ("Running DOPE... (Listening to camera topic: '{}')".format(topic_cam)) print ("Ctrl-C to stop") while not rospy.is_shutdown(): if g_img is not None: # Copy and draw image img_copy = g_img.copy() im = Image.fromarray(img_copy) g_draw = ImageDraw.Draw(im) detection_array = Detection3DArray() detection_array.header = image_msg.header for m in models: # Detect object results = ObjectDetector.detect_object_in_image( models[m].net, pnp_solvers[m], g_img, config_detect ) # Publish pose and overlay cube on image for i_r, result in enumerate(results): if result["location"] is None: continue loc = result["location"] ori = result["quaternion"] # transform orientation transformed_ori = tf.transformations.quaternion_multiply(ori, model_transforms[m]) # rotate bbox dimensions if necessary # (this only works properly if model_transform is in 90 degree angles) dims = rotate_vector(vector=dimensions[m], quaternion=model_transforms[m]) dims = np.absolute(dims) dims = tuple(dims) msg = PoseStamped() msg.header.frame_id = params["frame_id"] msg.header.stamp = rospy.Time.now() CONVERT_SCALE_CM_TO_METERS = 100 msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS msg.pose.orientation.x = ori[0] msg.pose.orientation.y = ori[1] msg.pose.orientation.z = ori[2] msg.pose.orientation.w = ori[3] # Publish pubs[m].publish(msg) pub_dimension[m].publish(str(params['dimensions'][m])) # Add to Detection3DArray detection = Detection3D() hypothesis = ObjectHypothesisWithPose() hypothesis.id = class_ids[result["name"]] hypothesis.score = result["score"] hypothesis.pose.pose = msg.pose detection.results.append(hypothesis) detection.bbox.center = msg.pose detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS detection_array.detections.append(detection) # Draw the cube if None not in result['projected_points']: points2d = [] for pair in result['projected_points']: points2d.append(tuple(pair)) DrawCube(points2d, draw_colors[m]) # Publish the image with results overlaid pub_rgb_dope_points.publish( CvBridge().cv2_to_imgmsg( np.array(im)[...,::-1], "bgr8" ) ) # Delete all existing markers markers = MarkerArray() marker = Marker() marker.action = Marker.DELETEALL markers.markers.append(marker) pub_markers.publish(markers) # Object markers class_id_to_name = {class_id: name for name, class_id in class_ids.iteritems()} markers = MarkerArray() for i, det in enumerate(detection_array.detections): name = class_id_to_name[det.results[0].id] color = draw_colors[name] # cube marker marker = Marker() marker.header = detection_array.header marker.action = Marker.ADD marker.pose = det.bbox.center marker.color.r = color[0] / 255.0 marker.color.g = color[1] / 255.0 marker.color.b = color[2] / 255.0 marker.color.a = 0.4 marker.ns = "bboxes" marker.id = i marker.type = Marker.CUBE marker.scale = det.bbox.size markers.markers.append(marker) pub_markers.publish(markers) pub_detections.publish(detection_array) rate.sleep()