def _construct_detection_msg_and_update_detection_image( self, detection_res, channel_id, stamp): if channel_id == Side.PORT: multiplier = -1 else: multiplier = 1 detection_array_msg = Detection2DArray() detection_array_msg.header.frame_id = self.pub_frame_id detection_array_msg.header.stamp = stamp for object_id, detection in detection_res.items(): detection_msg = Detection2D() detection_msg.header = detection_array_msg.header object_hypothesis = ObjectHypothesisWithPose() object_hypothesis.id = object_id.value object_hypothesis.score = detection['confidence'] object_hypothesis.pose.pose = self._detection_to_pose( detection['pos'], channel_id) # Filter out object detection outliers if abs(object_hypothesis.pose.pose.position.y) > self.water_depth: continue else: pos = self.channel_size + multiplier * detection['pos'] self.detection_image[ 0, max(pos - 10, 0):min(pos + 10, self.channel_size * 2), :] = self.detection_colors[object_id] detection_msg.results.append(object_hypothesis) detection_array_msg.detections.append(detection_msg) return detection_array_msg
def callback(self, ros_data): '''Callback function of subscribed topic. ''' if ros_data.header.stamp < self.latest_finish_time: return #### direct conversion to CV2 #### now = rospy.Time.now() np_arr = np.fromstring(ros_data.data, np.uint8) image_np = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) # dim = (int(len(image_np / 20)), int(len(image_np[0]) / 20)) # cv2.resize(image_np, dim) # image_np = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) # OpenCV >= 3.0: results = detect(self.net, self.meta, image_np) detections = Detection2DArray() for result in results: detection = Detection2D() detection.header = ros_data.header res = ObjectHypothesisWithPose() res.id = self.get_id(result[0]) res.score = result[1] detection.results.append(res) detection.bbox.size_x = result[2][2] detection.bbox.size_y = result[2][3] detection.bbox.center.x = result[2][0] detection.bbox.center.y = result[2][1] detections.detections.append(detection) self.res_pub.publish(detections) rospy.loginfo_throttle( 1, 'Took yolo %s to process image' % ((rospy.Time.now() - now).to_sec())) self.latest_finish_time = rospy.Time.now()
def listener_callback(self, data): self.get_logger().info("Received an image! ") try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB) self.timer.start() boxes, labels, probs = self.predictor.predict(image, 10, 0.4) interval = self.timer.end() print('Time: {:.2f}s, Detect Objects: {:d}.'.format( interval, labels.size(0))) detection_array = Detection2DArray() for i in range(boxes.size(0)): box = boxes[i, :] label = f"{self.class_names[labels[i]]}: {probs[i]:.2f}" print("Object: " + str(i) + " " + label) cv2.rectangle(cv_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4) # Definition of 2D array message and ading all object stored in it. object_hypothesis_with_pose = ObjectHypothesisWithPose() object_hypothesis_with_pose.id = str(self.class_names[labels[i]]) object_hypothesis_with_pose.score = float(probs[i]) bounding_box = BoundingBox2D() bounding_box.center.x = float((box[0] + box[2]) / 2) bounding_box.center.y = float((box[1] + box[3]) / 2) bounding_box.center.theta = 0.0 bounding_box.size_x = float(2 * (bounding_box.center.x - box[0])) bounding_box.size_y = float(2 * (bounding_box.center.y - box[1])) detection = Detection2D() detection.header = data.header detection.results.append(object_hypothesis_with_pose) detection.bbox = bounding_box detection_array.header = data.header detection_array.detections.append(detection) cv2.putText( cv_image, label, (box[0] + 20, box[1] + 40), cv2.FONT_HERSHEY_SIMPLEX, 1, # font scale (255, 0, 255), 2) # line type # Displaying the predictions cv2.imshow('object_detection', cv_image) # Publishing the results onto the the Detection2DArray vision_msgs format self.detection_publisher.publish(detection_array) ros_image = self.bridge.cv2_to_imgmsg(cv_image) ros_image.header.frame_id = 'camera_frame' self.result_publisher.publish(ros_image) cv2.waitKey(1)
def object_predict(self, object_data, header, image_np, image, offset_x, offset_y): image_height, image_width, channels = image.shape obj = Detection2D() obj_hypothesis = ObjectHypothesisWithPose() object_id = object_data[0] object_score = object_data[1] dimensions = object_data[2] # TODO - Need to make it so that the size and offset of the bounding box is corrected to the original # 320 x 240 image. Center x and center y need to be tested, while size x and size y need to be fixed obj.header = header obj_hypothesis.id = object_id obj_hypothesis.score = object_score obj.results.append(obj_hypothesis) obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height) obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width) obj.bbox.center.x = int( (dimensions[1] + dimensions[3]) * image_height / 2) + offset_x #do visualize this later obj.bbox.center.y = int((dimensions[0] + dimensions[2]) * image_width / 2) + offset_y #do visualize this later return obj
def object_predict(self, object_data, header, image_np, image): image_height, image_width, channels = image.shape obj = Detection2D() obj_hypothesis = ObjectHypothesisWithPose() object_id = object_data[0] object_score = object_data[1] dimensions = object_data[2] obj.header = header obj_hypothesis.id = object_id obj_hypothesis.score = object_score obj.results.append(obj_hypothesis) obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height) obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width) obj.bbox.center.x = int( (dimensions[1] + dimensions[3]) * image_height / 2) obj.bbox.center.y = int( (dimensions[0] + dimensions[2]) * image_width / 2) # rospy.loginfo("publish bbox.size x: %d", obj.bbox.size_x) # rospy.loginfo("publish bbox.size y: %d", obj.bbox.size_y) # rospy.loginfo("publish bbox.center x: %d", obj.bbox.center.x) # rospy.loginfo("publish bbox.center y: %d", obj.bbox.center.y) return obj
def object_predict(self, object_data, header, image_np, image): image_height, image_width, channels = image.shape obj = Detection2D() obj_hypothesis = ObjectHypothesisWithPose() object_id = object_data[0] object_score = object_data[1] dimensions = object_data[2] obj.header = header obj_hypothesis.id = object_id obj_hypothesis.score = object_score obj.results.append(obj_hypothesis) obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height) obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width) obj.bbox.center.x = int( (dimensions[1] + dimensions[3]) * image_height / 2) obj.bbox.center.y = int( (dimensions[0] + dimensions[2]) * image_width / 2) #print(str(obj.bbox)) # print('height:'+str(image_height)) # print('width: '+str(image_width)) return obj
def callback(self): image = rospy.wait_for_message( "/zed2/zed_node/left/image_rect_color/compressed", CompressedImage) time1 = rospy.Time.now().to_sec() self.timer.header = image.header self.timer.header.frame_id = "zed2_left_camera_frame" self.timer.time_ref = rospy.Time.now() self.timer_pub.publish(self.timer) cv_image = self.bridge.compressed_imgmsg_to_cv2(image, "bgr8") _, bboxes = detect_image(self.yolo, cv_image, "", input_size=YOLO_INPUT_SIZE, show=False, CLASSES=TRAIN_CLASSES, score_threshold=0.3, iou_threshold=0.1, rectangle_colors=(255, 0, 0)) detect = Detection2DArray() detect.header = image.header for Object in bboxes: detection = Detection2D() hypo = ObjectHypothesisWithPose() #Start x x1 = Object[0] #End x x2 = Object[2] #Start y y1 = Object[1] #end y y2 = Object[3] #Size x Sx = x2 - x1 #Size y Sy = y2 - y1 #Center x Cx = x1 + Sx / 2 #Center y Cy = y1 + Sy / 2 detection.bbox.center.x = Cx detection.bbox.center.y = Cy detection.bbox.size_x = Sx detection.bbox.size_y = Sy hypo.id = int(Object[5]) hypo.score = Object[4] detection.results = [ hypo, ] detection.is_tracking = False detect.detections.append(detection) self.boxes_pub.publish(detect) self.callback()
def detect(self, image): # Convert to grayscale if needed if image.ndim == 3: image = cv.cvtColor(image, cv.COLOR_BGR2GRAY) image_height, image_width = image.shape image_area = image_height * image_width # Apply (inverse) binary threshold to input image mask = cv.threshold(image, THRESHOLD, THRESHOLD_MAX, cv.THRESH_BINARY_INV)[1] # Dilate mask to find more reliable contours # kernel = np.ones((5, 5), np.uint8) # mask_dilated = cv.dilate(mask, kernel, iterations=1) # Find external approximate contours in dilated mask contours, hierarchy = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) # Filter out contours that don't qualify as a detection detections = [] for contour in contours: # Filer out if the contour touches the image border x, y, w, h = cv.boundingRect(contour) if x == 0 or y == 0 or x + w == image_width or y + h == image_height: continue # Filter out if the contour is too small if cv.contourArea(contour) < 1e-4 * image_area: continue detections.append((x, y, w, h)) # Fill detections msg detection_array_msg = Detection2DArray() for detection in detections: x, y, w, h = detection center_x = x + w / 2. center_y = y + h / 2. bbox = BoundingBox2D() bbox.center = Pose2D(x=center_x, y=center_y, theta=0) bbox.size_x = w bbox.size_y = h object_hypothesis = ObjectHypothesisWithPose() object_hypothesis.id = 0 object_hypothesis.score = 1.0 detection_msg = Detection2D() detection_msg.bbox = bbox detection_msg.results.append(object_hypothesis) detection_array_msg.detections.append(detection_msg) return detection_array_msg
def _handle_yolo_detect(self, req): cv_image = None detection_array = Detection2DArray() detections = [] boxes = None try: cv_image = self.bridge.imgmsg_to_cv2(req.image, "bgr8") except CvBridgeError as e: rospy.logerr(e) try: boxes = self.yolo.predict(cv_image) except SystemError: pass # rospy.loginfo('Found {} boxes'.format(len(boxes))) for box in boxes: detection = Detection2D() results = [] bbox = BoundingBox2D() center = Pose2D() detection.header = Header() detection.header.stamp = rospy.get_rostime() # detection.source_img = deepcopy(req.image) labels = box.get_all_labels() for i in range(0,len(labels)): object_hypothesis = ObjectHypothesisWithPose() object_hypothesis.id = i object_hypothesis.score = labels[i] results.append(object_hypothesis) detection.results = results x, y = box.get_xy_center() center.x = x center.y = y center.theta = 0.0 bbox.center = center size_x, size_y = box.get_xy_extents() bbox.size_x = size_x bbox.size_y = size_y detection.bbox = bbox detections.append(detection) detection_array.header = Header() detection_array.header.stamp = rospy.get_rostime() detection_array.detections = detections return YoloDetectResponse(detection_array)
def _publish_marker_detection(self, marker, cos_sim): """Publish detected marker""" distance = self._get_distance_to_marker(marker) object_hypothesis = ObjectHypothesisWithPose() object_hypothesis.id = 1 # the higher the cos_sim (further away from 90 degree angle between current_pose # and the marker), the lower the score object_hypothesis.score = (-cos_sim + (self.buoy_radius * 2)) / ( self.buoy_radius * 2) marker_transformed = self._transform_pose( marker, from_frame=marker.header.frame_id) object_hypothesis.pose.pose = marker_transformed.pose # Add random noise to pose of object object_hypothesis.pose.pose.position.x += np.random.randn( ) * self.noise_sigma object_hypothesis.pose.pose.position.y += np.random.randn( ) * self.noise_sigma object_hypothesis.pose.pose.position.z += np.random.randn( ) * self.noise_sigma object_hypothesis.pose.pose.orientation.x += np.random.randn( ) * self.noise_sigma object_hypothesis.pose.pose.orientation.y += np.random.randn( ) * self.noise_sigma object_hypothesis.pose.pose.orientation.z += np.random.randn( ) * self.noise_sigma object_hypothesis.pose.pose.orientation.w += np.random.randn( ) * self.noise_sigma # Wrap ObjectHypothesisWithPose msg into Detection2D msg detection_msg = Detection2D() detection_msg.header.frame_id = self.published_frame_id detection_msg.header.stamp = rospy.Time.now() detection_msg.results.append(object_hypothesis) # Wrap Detection2D msg into Detection2DArray msg detection_array_msg = Detection2DArray() detection_array_msg.header = detection_msg.header detection_array_msg.detections.append(detection_msg) self.pub.publish(detection_array_msg) # Publish detection as a Marker for visualization in rviz detected_marker = copy.deepcopy(marker) detected_marker.header.stamp = rospy.Time.now() detected_marker.ns = 'detected_{}'.format(detected_marker.ns) detected_marker.color = ColorRGBA(0, 1, 0, 1) detected_marker.lifetime.secs = 1 self.pub_detected_markers.publish(detected_marker)
def callback_yolo(self, image): image = self.image cv_image = self.bridge.compressed_imgmsg_to_cv2(image, "bgr8") _, bboxes = detect_image(self.yolo, cv_image, "", input_size=YOLO_INPUT_SIZE, show=False, rectangle_colors=(255, 0, 0)) detect = Detection2DArray() detect.header = image.header for Object in bboxes: detection = Detection2D() hypo = ObjectHypothesisWithPose() #Start x x1 = Object[0] #End x x2 = Object[2] #Start y y1 = Object[1] #end y y2 = Object[3] #Size x Sx = x2 - x1 #Size y Sy = y2 - y1 #Center x Cx = x1 + Sx / 2 #Center y Cy = y1 + Sy / 2 detection.bbox.center.x = Cx detection.bbox.center.y = Cy detection.bbox.size_x = Sx detection.bbox.size_y = Sy hypo.id = int(Object[5]) hypo.score = Object[4] detection.results = [ hypo, ] detect.detections.append(detection) self.bbox_pub.publish(detect)
def create_detections_msg(self, image_np, output_dict): img_height = image_np.shape[0] img_width = image_np.shape[1] boxes = output_dict['detection_boxes'] classes = output_dict['detection_classes'] scores = output_dict['detection_scores'] detections = Detection2DArray() detections.header.stamp = self.get_clock().now().to_msg() detections.detections = [] for i in range(len(boxes)): det = Detection2D() det.header = detections.header det.results = [] detected_object = ObjectHypothesisWithPose() detected_object.id = classes[i].item() detected_object.score = scores[i].item() det.results.append(detected_object) # box is ymin, xmin, ymax, xmax in normalized coordinates box = boxes[i] det.bbox.size_y = (box[2] - box[0]) * img_height det.bbox.size_x = (box[3] - box[1]) * img_width det.bbox.center.x = (box[1] + box[3]) * img_height / 2 det.bbox.center.y = (box[0] + box[2]) * img_width / 2 if (self.republish_image): box_img = image_np[int(box[0] * img_height):int(box[2] * img_height), int(box[1] * img_width):int(box[3] * img_width)] det.source_img = img_utils.image_np_to_image_msg(box_img) detections.detections.append(det) return detections
def detect_objects(self, data): """ This function detects and classifies the objects in the image provided through a Service Request by running on her the provided detection model. Returns a vision_msgs/Detection2DArray, for which each detection is populated only with the bbox and results fields. Moreover, for what it concerns the results field, each result is populated only with the id and score fields. All the other fields are not significant for this application, so they have been ignored. Args: data (sensor_msgs/Image): image to perform object detection on. Returns: pepper_object_detectionResponse: response encapsulating data regarding detected objects, structured as in service definition. """ # Convert image from sensor_msgs/Image to numpy array img_np = ros_numpy.numpify(data.img) rospy.loginfo('Object detection server computing predictions...') detections = self._detection_model(img_np) rospy.loginfo('Predictions computed!') message = Detection2DArray() # Insert all the predictions into the message and return them for class_id, score, box in zip(detections['detection_classes'], detections['detection_scores'], detections['detection_boxes']): detection = Detection2D() detection.bbox.size_x = box[3] - box[1] detection.bbox.size_y = box[2] - box[0] detection.bbox.center.x = box[1] + detection.bbox.size_x / 2 detection.bbox.center.y = box[0] + detection.bbox.size_y / 2 detected_object = ObjectHypothesisWithPose() detected_object.score = score detected_object.id = class_id detection.results.append(detected_object) message.detections.append(detection) # Create a response object response = pepper_object_detectionResponse() response.detections = message return response
def publish_markers(self, fid_data_array): fidarray = FiducialTransformArray() fidarray.header.stamp = rospy.Time.now() vis = Detection2DArray() vis.header.stamp = rospy.Time.now() for fid in fid_data_array: if VIS_MSGS: obj = Detection2D() oh = ObjectHypothesisWithPose() oh.id = fid.id oh.pose.pose.position.x = fid.translation.x oh.pose.pose.position.y = fid.translation.y oh.pose.pose.position.z = fid.translation.z oh.pose.pose.orientation.w = fid.rotation.w oh.pose.pose.orientation.x = fid.rotation.x oh.pose.pose.orientation.y = fid.rotation.y oh.pose.pose.orientation.z = fid.rotation.z oh.score = math.exp(-2 * OBJECT_ERROR) obj.results.append(oh) vis.detections.append(obj) else: data = FiducialTransform() data.fiducial_id = fid.id data.transform.translation = fid.translation data.transform.rotation = fid.rotation data.image_error = IMAGE_ERROR data.object_error = OBJECT_ERROR data.fiducial_area = FIDUCIAL_AREA fidarray.transforms.append(data) if VIS_MSGS: self.fid_pub.publish(vis) else: self.fid_pub.publish(fidarray)
def object_predict(self, object_data, header, image_np, image, ID): image_height, image_width, channels = image.shape obj = Detection2D() obj_hypothesis = ObjectHypothesisWithPose() object_id = ID object_score = object_data[1] dimensions = object_data[2] obj.header = header obj_hypothesis.id = object_id obj_hypothesis.score = object_score obj.results.append(obj_hypothesis) obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height) obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width) #the center point of x coordinate is determined here... obj.bbox.center.x = int( (dimensions[1] + dimensions[3]) * image_width / 2) #the center point of y coordinate is determined here... obj.bbox.center.y = int( (dimensions[0] + dimensions[2]) * image_height / 2) #detection results are returned as type Detection2D to objArray... return obj
def gather_pose_results(self, header, class_id, translation, rotation, score): pose_msg = PoseStamped() pose_msg.header = header pose_msg.pose.position.x = translation[0] pose_msg.pose.position.y = translation[1] pose_msg.pose.position.z = translation[2] pose_msg.pose.orientation.x = rotation[0] pose_msg.pose.orientation.y = rotation[1] pose_msg.pose.orientation.z = rotation[2] pose_msg.pose.orientation.w = rotation[3] # Add to detection3Darray detections = Detection3D() hypothesis = ObjectHypothesisWithPose() hypothesis.id = class_id hypothesis.pose.pose = pose_msg.pose hypothesis.score = score detections.results.append(hypothesis) detections.bbox.center = pose_msg.pose detections.bbox.size.x = self.dims[class_id][0] * 2 detections.bbox.size.y = self.dims[class_id][1] * 2 detections.bbox.size.z = self.dims[class_id][2] * 2 return pose_msg, detections
def detect(img, camera_frame_id, sensor_image): time1 = time.time() global ros_image cudnn.benchmark = True dataset = loadimg(img) # print(dataset[3]) #plt.imshow(dataset[2][:, :, ::-1]) names = model.module.names if hasattr(model, 'module') else model.names #colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] #colors=[[0,255,0]] augment = 'store_true' conf_thres = 0.3 iou_thres = 0.45 classes = (0, 1, 2, 3, 5, 7) agnostic_nms = 'store_true' img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once path = dataset[0] img = dataset[1] im0s = dataset[2] vid_cap = dataset[3] img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 time2 = time.time() if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) view_img = 1 save_txt = 1 save_conf = 'store_true' time3 = time.time() waste_detections = Detection2DArray() waste_detections.header = Header() waste_detections.header.stamp = rospy.get_rostime() waste_detections.header.frame_id = camera_frame_id for i, det in enumerate(pred): # detections per image det_obj = Detection2D() det_obj.header = waste_detections.header # det_obj.source_img = sensor_image result = ObjectHypothesisWithPose() p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None: #print(det) # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, conf, *xywh) if save_conf else (cls, *xywh) # label format det_obj.bbox.center.x = xywh[0] + xywh[2] / 2 det_obj.bbox.center.y = xywh[1] + xywh[3] / 2 det_obj.bbox.size_x = xywh[2] det_obj.bbox.size_y = xywh[3] result.id = int(cls) result.score = conf if view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=[0, 255, 0], line_thickness=3) det_obj.results.append(result) waste_detections.detections.append(det_obj) time4 = time.time() print('************') print('2-1', time2 - time1) print('3-2', time3 - time2) print('4-3', time4 - time3) print('total', time4 - time1) out_img = im0[:, :, [2, 1, 0]] ros_image = out_img cv2.imshow('YOLOV5', out_img) a = cv2.waitKey(1) #### Create CompressedIamge #### if (waste_detections.detections.__sizeof__ != 0): det_pub.publish(waste_detections) publish_image(im0)
def callback(self, data): try: cv_image = self.bridge.imgmsg_to_cv2( data, desired_encoding="passthrough") except CvBridgeError as e: rospy.logerr(e) results = self.engine.DetectWithImage(PIL.Image.fromarray(cv_image), top_k=1, threshold=self.threshold, keep_aspect_ratio=True, relative_coord=True) detections = Detection2DArray() now = rospy.get_rostime() for detection in results: top_left, bottom_right = detection.bounding_box min_x, min_y = top_left max_x, max_y = bottom_right imheight, imwidth, _ = cv_image.shape min_x *= imwidth max_x *= imwidth min_y *= imheight max_y *= imheight centre_x = (max_x + min_x) / 2.0 centre_y = (max_y + min_y) / 2.0 height = max_y - min_y width = max_x - min_x if height <= 0 or width <= 0: continue bbox = BoundingBox2D() bbox.center.x = centre_x bbox.center.y = centre_y bbox.size_x = width bbox.size_y = height hypothesis = ObjectHypothesisWithPose() hypothesis.id = str(detection.label_id) hypothesis.score = detection.score hypothesis.pose.pose.position.x = centre_x hypothesis.pose.pose.position.y = centre_y # update the timestamp of the object object = Detection2D() object.header.stamp = now object.header.frame_id = data.header.frame_id object.results.append(hypothesis) object.bbox = bbox object.source_img.header.frame_id = data.header.frame_id object.source_img.header.stamp = now object.source_img.height = int(height) object.source_img.width = int(width) object.source_img.encoding = "rgb8" object.source_img.step = int(width * 3) object.source_img.data = cv_image[int(min_y):int(max_y), int(min_x):int(max_x)].tobytes() detections.detections.append(object) if len(results) > 0: self.detection_pub.publish(detections) rospy.logdebug("%.2f ms" % self.engine.get_inference_time())
def detect_multi(self, images): image_shape = images[0].shape images_list = [] for image in images: image = cv.resize(image, self.image_size[::-1]) # cv uses width x height! image = image[..., np.newaxis] if not self.grayscale and image.shape[-1] == 1: image = cv.cvtColor(image, cv.COLOR_GRAY2RGB) image_torch = self.transform(image)[np.newaxis, ...] images_list.append(image_torch) images_torch = torch.cat(images_list, 0) if self.half: images_torch = images_torch.half() if self.has_gpu and self.use_gpu: images_torch = images_torch.cuda() with torch.no_grad(): # Raw detection is a (N x 8190 x num_classes) tensor start_time_inf = self.time_synchronized() raw_detections, _ = self.model(images_torch) elapsed_time_inf = self.time_synchronized() - start_time_inf if self.verbose: print('time (inf): {:.4f}s'.format(elapsed_time_inf)) if self.half: raw_detections = raw_detections.float() start_time_nms = self.time_synchronized() detections_torch = self.non_max_suppression( raw_detections.float(), conf_thres=self.confidence_threshold, iou_thres=self.iou_threshold) elapsed_time_nms = self.time_synchronized() - start_time_nms if self.verbose: print('time (nms): {:.4f}s'.format(elapsed_time_nms)) if self.verbose: print('time (tot): {:.4f}s\n'.format(elapsed_time_inf + elapsed_time_nms)) detection_array_list = [] for detection_torch in detections_torch: detection_array_msg = Detection2DArray() if detection_torch is not None: # Rescale bounding boxes back to original (old) image size # shape_old: Shape of images as they come in # shape_new: Shape of images used for inference detection_torch = rescale_boxes(detection_torch, shape_old=image_shape[:2], shape_new=self.image_size) detections = detection_torch.cpu().numpy() for i, detection in enumerate(detections): x1, y1, x2, y2, object_conf, class_pred = detection class_id = int(class_pred) # class_name = self.classes[class_id] size_x = (x2 - x1) size_y = (y2 - y1) x = x1 + (size_x / 2.0) y = y1 + (size_y / 2.0) bbox = BoundingBox2D() bbox.center = Pose2D(x=x, y=y, theta=0) bbox.size_x = size_x bbox.size_y = size_y object_hypothesis = ObjectHypothesisWithPose() object_hypothesis.id = class_id object_hypothesis.score = object_conf detection_msg = Detection2D() detection_msg.bbox = bbox detection_msg.results.append(object_hypothesis) detection_array_msg.detections.append(detection_msg) detection_array_list.append(detection_array_msg) return detection_array_list
def run_on_image_icp(self, image_filename, category_names_to_id, cloud_in, output_image_filepath, publish_cloud=True): """Image callback""" camera_info = self.camera_info if not self.info_manager.isCalibrated(): rospy.logwarn('Camera is not calibrated, please supply a valid camera_info_url parameter!') img = cv2.imread(image_filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Update camera matrix and distortion coefficients if self.input_is_rectified: P = np.matrix(camera_info.P, dtype='float64') P.resize((3, 4)) camera_matrix = P[:, :3] dist_coeffs = np.zeros((4, 1)) else: camera_matrix = np.matrix(camera_info.K, dtype='float64') camera_matrix.resize((3, 3)) dist_coeffs = np.matrix(camera_info.D, dtype='float64') dist_coeffs.resize((len(camera_info.D), 1)) # Downscale image if necessary height, width, _ = img.shape scaling_factor = float(self.downscale_height) / height if scaling_factor < 1.0: camera_matrix[:2] *= scaling_factor img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height))) for m in self.models: self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix) self.pnp_solvers[m].set_dist_coeffs(dist_coeffs) # Copy and draw image img_copy = img.copy() im = Image.fromarray(img_copy) draw = Draw(im) detection_array = Detection3DArray() detection_array.header = "camera" annotations = [] start_time = time.time() for m in self.models: # Detect object results = ObjectDetector.detect_object_in_image( self.models[m].net, self.pnp_solvers[m], img, self.config_detect ) # Publish pose and overlay cube on image for i_r, result in enumerate(results): if result["location"] is None: continue print(result) loc = result["location"] ori = result["quaternion"] CONVERT_SCALE_CM_TO_METERS = 100 pose_frame = "camera" rospy.logwarn("Doing ICP for result : {}, {}".format(i_r, result["name"])) loc_scale = np.array([loc[0] / 100, loc[1] / 100, loc[2] / 100]) R = tf.transformations.quaternion_matrix(ori) T = tf.transformations.translation_matrix(loc_scale) total_transform = tf.transformations.concatenate_matrices(T, R) cloud_filtered_array = self.transform_cloud(self.mesh_clouds[result["name"]], mat=total_transform) cloud_color = np.zeros(cloud_filtered_array.shape[0]) ros_msg = self.xyzrgb_array_to_pointcloud2( cloud_filtered_array, cloud_color, rospy.Time.now(), "camera" ) # self.pub_pose_cloud.publish(ros_msg) # rospy.logwarn("Num points after downsample and filter : {}".format(cloud_filtered_array.shape[0])) cloud_pose = pcl.PointCloud() cloud_pose.from_array(cloud_filtered_array) icp = cloud_pose.make_GeneralizedIterativeClosestPoint() converged, transf, estimate, fitness = icp.gicp(cloud_pose, cloud_in, max_iter=25) print('has converged:' + str(converged) + ' score: ' + str(fitness)) print(str(transf)) total_transform_icp = tf.transformations.concatenate_matrices(transf, total_transform) print(str(total_transform_icp)) loc = tf.transformations.translation_from_matrix(total_transform_icp) * 100 ori = tf.transformations.quaternion_from_matrix(total_transform_icp) pose_frame = "camera" if publish_cloud : cloud_filtered_array = self.transform_cloud(self.mesh_clouds[result["name"]], mat=total_transform_icp) cloud_color = np.zeros(cloud_filtered_array.shape[0]) ros_msg = self.xyzrgb_array_to_pointcloud2( cloud_filtered_array, cloud_color, rospy.Time.now(), "camera" ) self.pub_pose_cloud.publish(ros_msg) # transform orientation transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m]) # rotate bbox dimensions if necessary # (this only works properly if model_transform is in 90 degree angles) dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m]) dims = np.absolute(dims) dims = tuple(dims) pose_msg = PoseStamped() # pose_msg.header = image_msg.header pose_msg.header.stamp = rospy.Time.now() pose_msg.header.frame_id = pose_frame pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.orientation.x = transformed_ori[0] pose_msg.pose.orientation.y = transformed_ori[1] pose_msg.pose.orientation.z = transformed_ori[2] pose_msg.pose.orientation.w = transformed_ori[3] annotations.append({ 'location' : loc, 'quaternion_xyzw' : transformed_ori.tolist(), 'category_id' : category_names_to_id[m], 'id' : i_r }) # Publish self.pubs[m].publish(pose_msg) self.pub_dimension[m].publish(str(dims)) # Add to Detection3DArray detection = Detection3D() hypothesis = ObjectHypothesisWithPose() hypothesis.id = self.class_ids[result["name"]] hypothesis.score = result["score"] hypothesis.pose.pose = pose_msg.pose detection.results.append(hypothesis) detection.bbox.center = pose_msg.pose detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS detection_array.detections.append(detection) # Draw the cube if None not in result['projected_points'] and publish_cloud: points2d = [] for pair in result['projected_points']: points2d.append(tuple(pair)) draw.draw_cube(points2d, self.draw_colors[m]) # Publish the image with results overlaid runtime = (time.time() - start_time) self.pub_rgb_dope_points.publish( CvBridge().cv2_to_imgmsg( np.array(im)[..., ::-1], "bgr8" ) ) final_im = np.array(im)[..., ::-1] cv2.imwrite(output_image_filepath, np.array(im)[..., ::-1]) self.pub_detections.publish(detection_array) self.publish_markers(detection_array) return annotations, runtime
def step(self): stamp = super().step() if not stamp: return # Publish camera data if self._image_publisher.get_subscription_count( ) > 0 or self._always_publish: self._wb_device.enable(self._timestep) image = self._wb_device.getImage() if image is None: return # Image data msg = Image() msg.header.stamp = stamp msg.header.frame_id = self._frame_id msg.height = self._wb_device.getHeight() msg.width = self._wb_device.getWidth() msg.is_bigendian = False msg.step = self._wb_device.getWidth() * 4 # We pass `data` directly to we avoid using `data` setter. # Otherwise ROS2 converts data to `array.array` which slows down the simulation as it copies memory internally. # Both, `bytearray` and `array.array`, implement Python buffer protocol, so we should not see unpredictable # behavior. # deepcode ignore W0212: Avoid conversion from `bytearray` to `array.array`. msg._data = image msg.encoding = 'bgra8' self._image_publisher.publish(msg) self.__message_info.header.stamp = Time( seconds=self._node.robot.getTime()).to_msg() self._camera_info_publisher.publish(self.__message_info) if self._wb_device.hasRecognition( ) and (self._recognition_publisher.get_subscription_count() > 0 or self._recognition_webots_publisher.get_subscription_count() > 0): self._wb_device.recognitionEnable(self._timestep) objects = self._wb_device.getRecognitionObjects() if objects is None: return # Recognition data reco_msg = Detection2DArray() reco_msg_webots = WbCameraRecognitionObjects() reco_msg.header.stamp = stamp reco_msg_webots.header.stamp = stamp reco_msg.header.frame_id = self._frame_id reco_msg_webots.header.frame_id = self._frame_id for obj in objects: # Getting Object Info position = Point() orientation = Quaternion() position.x = obj.get_position()[0] position.y = obj.get_position()[1] position.z = obj.get_position()[2] axangle = obj.get_orientation() quat = axangle2quat(axangle[:-1], axangle[-1]) orientation.w = quat[0] orientation.x = quat[1] orientation.y = quat[2] orientation.z = quat[3] obj_model = obj.get_model().decode('UTF-8') obj_center = [ float(i) for i in obj.get_position_on_image() ] obj_size = [float(i) for i in obj.get_size_on_image()] obj_id = obj.get_id() obj_colors = obj.get_colors() # Object Info -> Detection2D reco_obj = Detection2D() hyp = ObjectHypothesisWithPose() hyp.id = obj_model hyp.pose.pose.position = position hyp.pose.pose.orientation = orientation reco_obj.results.append(hyp) reco_obj.bbox.center.x = obj_center[0] reco_obj.bbox.center.y = obj_center[1] reco_obj.bbox.size_x = obj_size[0] reco_obj.bbox.size_y = obj_size[1] reco_msg.detections.append(reco_obj) # Object Info -> WbCameraRecognitionObject reco_webots_obj = WbCameraRecognitionObject() reco_webots_obj.id = obj_id reco_webots_obj.model = obj_model reco_webots_obj.pose.pose.position = position reco_webots_obj.pose.pose.orientation = orientation reco_webots_obj.bbox.center.x = obj_center[0] reco_webots_obj.bbox.center.y = obj_center[1] reco_webots_obj.bbox.size_x = obj_size[0] reco_webots_obj.bbox.size_y = obj_size[1] for i in range(0, obj.get_number_of_colors()): color = ColorRGBA() color.r = obj_colors[3 * i] color.g = obj_colors[3 * i + 1] color.b = obj_colors[3 * i + 2] reco_webots_obj.colors.append(color) reco_msg_webots.objects.append(reco_webots_obj) self._recognition_webots_publisher.publish(reco_msg_webots) self._recognition_publisher.publish(reco_msg) else: self._wb_device.recognitionDisable() else: self._wb_device.disable()
def image_callback(self, image_msg, category_names_to_id): """Image callback""" img = self.cv_bridge.imgmsg_to_cv2(image_msg, "rgb8") # cv2.imwrite('img.png', cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # for debugging # Update camera matrix and distortion coefficients if self.input_is_rectified: P = np.matrix(camera_info.P, dtype='float64') P.resize((3, 4)) camera_matrix = P[:, :3] dist_coeffs = np.zeros((4, 1)) else: camera_matrix = np.matrix(camera_info.K, dtype='float64') camera_matrix.resize((3, 3)) dist_coeffs = np.matrix(camera_info.D, dtype='float64') dist_coeffs.resize((len(camera_info.D), 1)) # Downscale image if necessary height, width, _ = img.shape scaling_factor = float(self.downscale_height) / height if scaling_factor < 1.0: camera_matrix[:2] *= scaling_factor img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height))) for m in self.models: self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix) self.pnp_solvers[m].set_dist_coeffs(dist_coeffs) # Copy and draw image img_copy = img.copy() im = Image.fromarray(img_copy) draw = Draw(im) detection_array = Detection3DArray() detection_array.header = image_msg.header for m in self.models: # Detect object results = ObjectDetector.detect_object_in_image( self.models[m].net, self.pnp_solvers[m], img, self.config_detect ) # Publish pose and overlay cube on image for i_r, result in enumerate(results): if result["location"] is None: continue loc = result["location"] ori = result["quaternion"] # transform orientation transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m]) # rotate bbox dimensions if necessary # (this only works properly if model_transform is in 90 degree angles) dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m]) dims = np.absolute(dims) dims = tuple(dims) pose_msg = PoseStamped() pose_msg.header = image_msg.header CONVERT_SCALE_CM_TO_METERS = 100 pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.orientation.x = transformed_ori[0] pose_msg.pose.orientation.y = transformed_ori[1] pose_msg.pose.orientation.z = transformed_ori[2] pose_msg.pose.orientation.w = transformed_ori[3] # Publish self.pubs[m].publish(pose_msg) self.pub_dimension[m].publish(str(dims)) # Add to Detection3DArray detection = Detection3D() hypothesis = ObjectHypothesisWithPose() hypothesis.id = self.class_ids[result["name"]] hypothesis.score = result["score"] hypothesis.pose.pose = pose_msg.pose detection.results.append(hypothesis) detection.bbox.center = pose_msg.pose detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS detection_array.detections.append(detection) # Draw the cube if None not in result['projected_points']: points2d = [] for pair in result['projected_points']: points2d.append(tuple(pair)) draw.draw_cube(points2d, self.draw_colors[m]) # Publish the image with results overlaid self.pub_rgb_dope_points.publish( CvBridge().cv2_to_imgmsg( np.array(im)[..., ::-1], "bgr8" ) ) self.pub_detections.publish(detection_array) self.publish_markers(detection_array) return annotations
def run_on_image(self, image_filename, category_names_to_id, output_image_filepath): camera_info = self.camera_info if not self.info_manager.isCalibrated(): rospy.logwarn('Camera is not calibrated, please supply a valid camera_info_url parameter!') img = cv2.imread(image_filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Update camera matrix and distortion coefficients # camera_matrix = np.matrix(camera_info.K, dtype='float64') # camera_matrix.resize((3, 3)) # camera_matrix = np.array([[768.16058349609375, 0, 480], # [0, 768.16058349609375, 270], # [0, 0, 1]]) # dist_coeffs = np.matrix(camera_info.D, dtype='float64') # dist_coeffs.resize((len(camera_info.D), 1)) if self.input_is_rectified: P = np.matrix(camera_info.P, dtype='float64') P.resize((3, 4)) camera_matrix = P[:, :3] dist_coeffs = np.zeros((4, 1)) else: camera_matrix = np.matrix(camera_info.K, dtype='float64') camera_matrix.resize((3, 3)) dist_coeffs = np.matrix(camera_info.D, dtype='float64') dist_coeffs.resize((len(camera_info.D), 1)) # Downscale image if necessary height, width, _ = img.shape scaling_factor = float(self.downscale_height) / height if scaling_factor < 1.0: camera_matrix[:2] *= scaling_factor img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height))) for m in self.models: self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix) self.pnp_solvers[m].set_dist_coeffs(dist_coeffs) # Copy and draw image img_copy = img.copy() im = Image.fromarray(img_copy) draw = Draw(im) detection_array = Detection3DArray() # detection_array.header = image_msg.header detection_array.header = "camera" annotations = [] for m in self.models: # Detect object results = ObjectDetector.detect_object_in_image( self.models[m].net, self.pnp_solvers[m], img, self.config_detect ) print("results : {}".format(results)) # Publish pose and overlay cube on image for i_r, result in enumerate(results): if result["location"] is None: continue loc = result["location"] ori = result["quaternion"] # transform orientation # transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m]) transformed_ori = quaternion_multiply(ori, self.model_transforms[m]) # rotate bbox dimensions if necessary # (this only works properly if model_transform is in 90 degree angles) dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m]) dims = np.absolute(dims) dims = tuple(dims) pose_msg = PoseStamped() # pose_msg.header = image_msg.header pose_msg.header.stamp = rospy.Time.now() pose_msg.header.frame_id = "camera" CONVERT_SCALE_CM_TO_METERS = 100 pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS pose_msg.pose.orientation.x = transformed_ori[0] pose_msg.pose.orientation.y = transformed_ori[1] pose_msg.pose.orientation.z = transformed_ori[2] pose_msg.pose.orientation.w = transformed_ori[3] annotations.append({ 'location' : loc, 'quaternion_xyzw' : transformed_ori.tolist(), 'category_id' : category_names_to_id[m], 'id' : i_r }) # Publish self.pubs[m].publish(pose_msg) # self.pub_dimension[m].publish(str(dims)) # Add to Detection3DArray detection = Detection3D() hypothesis = ObjectHypothesisWithPose() hypothesis.id = self.class_ids[result["name"]] hypothesis.score = result["score"] hypothesis.pose.pose = pose_msg.pose detection.results.append(hypothesis) detection.bbox.center = pose_msg.pose detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS detection_array.detections.append(detection) # Draw the cube if None not in result['projected_points']: points2d = [] for pair in result['projected_points']: points2d.append(tuple(pair)) draw.draw_cube(points2d, self.draw_colors[m]) # Publish the image with results overlaid final_im = np.array(im)[..., ::-1] self.pub_rgb_dope_points.publish( CvBridge().cv2_to_imgmsg( final_im, "bgr8" ) ) cv2.imwrite(output_image_filepath, np.array(im)[..., ::-1]) self.pub_detections.publish(detection_array) self.publish_markers(detection_array) return annotations
def callback(self, imageMsg): image = self.bridge.imgmsg_to_cv2(imageMsg, "bgr8") image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image[:, :, ::-1].copy() # copy to draw on draw = image.copy() draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB) # Image formatting specific to Retinanet image = preprocess_image(image) image, scale = resize_image(image) # Run the inferencer try: with self.session.as_default(): with self.session.graph.as_default(): boxes, scores, labels = self.model.predict_on_batch(np.expand_dims(image, axis=0)) except Exception as e: rospy.logerr(e) rospy.logwarn("WARNING: Has your model been converted to an inference model yet? " "see https://github.com/fizyr/keras-retinanet#converting-a-training-model-to-inference-model") return # correct for image scale boxes /= scale # Construct the detection message header = Header(frame_id=imageMsg.header.frame_id) detections_message_out = Detection2DArray() detections_message_out.header = header detections_message_out.detections = [] # visualize detections for box, score, label in zip(boxes[0], scores[0], labels[0]): # scores are sorted so we can break if score < self.confidence_cutoff: break # Add boxes and captions b = np.array(box).astype(int) cv2.rectangle(draw, (b[0], b[1]), (b[2], b[3]), self.color, self.thickness, cv2.LINE_AA) if (label > len(self.labels_to_names)): print("WARNING: Got unknown label, using 'detection' instead") caption = "Detection {:.3f}".format(score) else: caption = "{} {:.3f}".format(self.labels_to_names[label], score) cv2.putText(draw, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(draw, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) #Construct the output detection message bb = BoundingBox2D() det = Detection2D(header=header) hyp = ObjectHypothesisWithPose() center = Pose2D() hyp.id = label hyp.score = score bb.size_x = b[2] - b[0] bb.size_y = b[3] - b[1] center.x = float(b[2] + b[0])/2 center.y = float(b[3] + b[1])/2 bb.center = center det.results.append(hyp) det.bbox = bb detections_message_out.detections.append(det) self.detections_pub.publish(detections_message_out) # Write out image image_message_out = self.bridge.cv2_to_imgmsg(draw, encoding="rgb8") self.img_pub.publish(image_message_out)
def run_dope_node(params, freq=5): '''Starts ROS node to listen to image topic, run DOPE, and publish DOPE results''' global g_img global g_draw pubs = {} models = {} pnp_solvers = {} pub_dimension = {} draw_colors = {} class_ids = {} model_transforms = {} dimensions = {} mesh_scales = {} meshes = {} # Initialize parameters matrix_camera = np.zeros((3,3)) matrix_camera[0,0] = params["camera_settings"]['fx'] matrix_camera[1,1] = params["camera_settings"]['fy'] matrix_camera[0,2] = params["camera_settings"]['cx'] matrix_camera[1,2] = params["camera_settings"]['cy'] matrix_camera[2,2] = 1 dist_coeffs = np.zeros((4,1)) if "dist_coeffs" in params["camera_settings"]: dist_coeffs = np.array(params["camera_settings"]['dist_coeffs']) config_detect = lambda: None config_detect.mask_edges = 1 config_detect.mask_faces = 1 config_detect.vertex = 1 config_detect.threshold = 0.5 config_detect.softmax = 1000 config_detect.thresh_angle = params['thresh_angle'] config_detect.thresh_map = params['thresh_map'] config_detect.sigma = params['sigma'] config_detect.thresh_points = params["thresh_points"] # For each object to detect, load network model, create PNP solver, and start ROS publishers for model in params['weights']: models[model] =\ ModelData( model, g_path2package + "/weights/" + params['weights'][model] ) models[model].load_net_model() mesh_scales[model] = 1.0 model_transforms[model] = np.array([0.0, 0.0, 0.0, 1.0], dtype='float64') draw_colors[model] = \ tuple(params["draw_colors"][model]) class_ids[model] = \ (params["class_ids"][model]) dimensions[model] = tuple(params["dimensions"][model]) pnp_solvers[model] = \ CuboidPNPSolver( model, matrix_camera, Cuboid3d(params['dimensions'][model]), dist_coeffs=dist_coeffs ) pubs[model] = \ rospy.Publisher( '{}/pose_{}'.format(params['topic_publishing'], model), PoseStamped, queue_size=10 ) pub_dimension[model] = \ rospy.Publisher( '{}/dimension_{}'.format(params['topic_publishing'], model), String, queue_size=10 ) # Start ROS publisher pub_rgb_dope_points = \ rospy.Publisher( params['topic_publishing']+"/rgb_points", ImageSensor_msg, queue_size=10 ) pub_detections = \ rospy.Publisher( 'detected_objects', Detection3DArray, queue_size=10 ) pub_markers = \ rospy.Publisher( 'markers', MarkerArray, queue_size=10 ) # Starts ROS listener rospy.Subscriber( topic_cam, ImageSensor_msg, __image_callback ) # Initialize ROS node rospy.init_node('dope_vis', anonymous=True) rate = rospy.Rate(freq) print ("Running DOPE... (Listening to camera topic: '{}')".format(topic_cam)) print ("Ctrl-C to stop") while not rospy.is_shutdown(): if g_img is not None: # Copy and draw image img_copy = g_img.copy() im = Image.fromarray(img_copy) g_draw = ImageDraw.Draw(im) detection_array = Detection3DArray() detection_array.header = image_msg.header for m in models: # Detect object results = ObjectDetector.detect_object_in_image( models[m].net, pnp_solvers[m], g_img, config_detect ) # Publish pose and overlay cube on image for i_r, result in enumerate(results): if result["location"] is None: continue loc = result["location"] ori = result["quaternion"] # transform orientation transformed_ori = tf.transformations.quaternion_multiply(ori, model_transforms[m]) # rotate bbox dimensions if necessary # (this only works properly if model_transform is in 90 degree angles) dims = rotate_vector(vector=dimensions[m], quaternion=model_transforms[m]) dims = np.absolute(dims) dims = tuple(dims) msg = PoseStamped() msg.header.frame_id = params["frame_id"] msg.header.stamp = rospy.Time.now() CONVERT_SCALE_CM_TO_METERS = 100 msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS msg.pose.orientation.x = ori[0] msg.pose.orientation.y = ori[1] msg.pose.orientation.z = ori[2] msg.pose.orientation.w = ori[3] # Publish pubs[m].publish(msg) pub_dimension[m].publish(str(params['dimensions'][m])) # Add to Detection3DArray detection = Detection3D() hypothesis = ObjectHypothesisWithPose() hypothesis.id = class_ids[result["name"]] hypothesis.score = result["score"] hypothesis.pose.pose = msg.pose detection.results.append(hypothesis) detection.bbox.center = msg.pose detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS detection_array.detections.append(detection) # Draw the cube if None not in result['projected_points']: points2d = [] for pair in result['projected_points']: points2d.append(tuple(pair)) DrawCube(points2d, draw_colors[m]) # Publish the image with results overlaid pub_rgb_dope_points.publish( CvBridge().cv2_to_imgmsg( np.array(im)[...,::-1], "bgr8" ) ) # Delete all existing markers markers = MarkerArray() marker = Marker() marker.action = Marker.DELETEALL markers.markers.append(marker) pub_markers.publish(markers) # Object markers class_id_to_name = {class_id: name for name, class_id in class_ids.iteritems()} markers = MarkerArray() for i, det in enumerate(detection_array.detections): name = class_id_to_name[det.results[0].id] color = draw_colors[name] # cube marker marker = Marker() marker.header = detection_array.header marker.action = Marker.ADD marker.pose = det.bbox.center marker.color.r = color[0] / 255.0 marker.color.g = color[1] / 255.0 marker.color.b = color[2] / 255.0 marker.color.a = 0.4 marker.ns = "bboxes" marker.id = i marker.type = Marker.CUBE marker.scale = det.bbox.size markers.markers.append(marker) pub_markers.publish(markers) pub_detections.publish(detection_array) rate.sleep()
def object_predict(self, object_data, header, image_np, image): image_height, image_width, channels = image.shape obj = Detection2D() obj_hypothesis = ObjectHypothesisWithPose() object_id = object_data[0] object_score = object_data[1] dimensions = object_data[2] obj.header = header obj_hypothesis.id = object_id obj_hypothesis.score = object_score #obj.results.append(obj_hypothesis) obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height) obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width) obj.bbox.center.x = int( (dimensions[1] + dimensions[3]) * image_width / 2) obj.bbox.center.y = int( (dimensions[0] + dimensions[2]) * image_height / 2) ################################### pixelDiametro = obj.bbox.size_x # choose the bigest size if (obj.bbox.size_x > obj.bbox.size_y): pixelDiametro = obj.bbox.size_x else: pixelDiametro = obj.bbox.size_y #DIAMETER_LANDMARCK_M = 0.24 OR 0.5 metersDiametroLandmarck = self.DIAMETER_LANDMARCK_M #DISTANCE_FOCAL = 750 distFocus_real = self.DISTANCE_FOCAL altura = float( (metersDiametroLandmarck * distFocus_real) / pixelDiametro) # rospy.loginfo("--------------------------------") # rospy.loginfo("Diametro Marcador Real: %f", metersDiametroLandmarck) # # rospy.loginfo("Distancia Focal Real: %f", distFocus_real) # rospy.loginfo("Diametro (pixel): %f", pixelDiametro) # rospy.loginfo("Altura Drone (m): %f", altura) ################################### pixel_x = int((obj.bbox.center.x - (image_width / 2)) * (-1)) pixel_y = int((obj.bbox.center.y - (image_height / 2)) * (1)) k = float(metersDiametroLandmarck / pixelDiametro) obj_hypothesis.pose.pose.position.x = pixel_x * k obj_hypothesis.pose.pose.position.y = pixel_y * k obj_hypothesis.pose.pose.position.z = altura obj.results.append(obj_hypothesis) #rospy.loginfo("publish obj_hypothesis.score: %d", object_score) # rospy.loginfo("publish bbox.size x: %d", obj.bbox.size_x) # rospy.loginfo("publish bbox.size y: %d", obj.bbox.size_y) # rospy.loginfo("publish bbox.center x: %d", obj.bbox.center.x) # rospy.loginfo("publish bbox.center y: %d", obj.bbox.center.y) return obj
def callback_synchronize(self, msg_img, msg_bbox, cam_id): cv_img = self.cv_bridge.imgmsg_to_cv2(msg_img) new_bbox = Detection2DArray() print("aaaaa") for detection in msg_bbox.detections: box = Detection2D() box.bbox = detection.bbox pose = ObjectHypothesisWithPose() print(detection.results[0].id) if detection.results[0].id == 7: # id:7 Traffic light x_min = int(detection.bbox.center.x - (detection.bbox.size_x / 2)) y_min = int(detection.bbox.center.y - (detection.bbox.size_y / 2)) x_max = int(detection.bbox.center.x + (detection.bbox.size_x / 2)) y_max = int(detection.bbox.center.y + (detection.bbox.size_y / 2)) img_cropped = cv_img[y_min:y_max, x_min:x_max] preprocessed_img = self.preprocess_img(img=img_cropped) predict_f = self.model_traffic_light(preprocessed_img) predict = predict_f.data.max(1, keepdim=True)[1] pose.id = int(1000 + int(predict)) cnn_prob = float(torch.exp(predict_f)[0][int(predict)]) pose.score = cnn_prob box.results.append(pose) # pose.id = int(1000 + int(predict)) # box.results.append(pose) if self.validate_light_with_brightness_region( img_cropped, int(predict)): new_bbox.detections.append(box) elif detection.results[0].id == 8: # id:8 Traffic sign x_min = int(detection.bbox.center.x - (detection.bbox.size_x / 2)) y_min = int(detection.bbox.center.y - (detection.bbox.size_y / 2)) x_max = int(detection.bbox.center.x + (detection.bbox.size_x / 2)) y_max = int(detection.bbox.center.y + (detection.bbox.size_y / 2)) img_cropped = cv_img[y_min:y_max, x_min:x_max] preprocessed_img = self.preprocess_img(img=img_cropped) predict_f = self.model_traffic_sign(preprocessed_img) predict = predict_f.data.max(1, keepdim=True)[1] cnn_prob = float(torch.exp(predict_f)[0][int(predict)]) pose.score = cnn_prob pose.id = int(2000 + int(predict)) box.results.append(pose) new_bbox.detections.append(box) else: box = detection new_bbox.detections.append(box) new_bbox.header = msg_bbox.header if cam_id == "cam_fm_01": self.pub_fm01_new_bbox.publish(new_bbox) elif cam_id == "cam_fr_01": self.pub_fr01_new_bbox.publish(new_bbox) elif cam_id == "cam_fl_01": self.pub_fl01_new_bbox.publish(new_bbox)
def EfficientDetNode(): rospy.init_node('efficient_det_node', anonymous=True) rospy.Subscriber('input', String, image_callback, queue_size=1) pub = rospy.Publisher('/image_detections', Detection2DArray, queue_size=10) rate = rospy.Rate(1) # 10hz path_list = os.listdir(path) path_list.sort(key=lambda x: int(x.split('.')[0])) stamp_file = open(stamp_path) stamp_lines = stamp_file.readlines() stamp_i = 0 for filename in path_list: img_path = filename cur_frame = img_path[:-4] img_path = path + "/" + img_path cur_stamp = ((float)(stamp_lines[stamp_i][-13:].strip('\n'))) # cur_stamp = rospy.Time.from_sec( # ((float)(stamp_lines[stamp_i][-13:].strip('\n')))) stamp_i += 1 detection_results = Detection2DArray() # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[ compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) display(cur_frame, out, ori_imgs, imshow=False, imwrite=True) for i in range(len(out)): for j in range(len(out[i]['rois'])): x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int) obj = obj_list[out[i]['class_ids'][j]] score = float(out[i]['scores'][j]) result = ObjectHypothesisWithPose() result.score = score if (obj == 'car'): result.id = 0 if (obj == 'person'): result.id = 1 if (obj == 'cyclist'): result.id = 2 detection_msg = Detection2D() detection_msg.bbox.center.x = (x1 + x2) / 2 detection_msg.bbox.center.y = (y1 + y2) / 2 detection_msg.bbox.size_x = x2 - x1 detection_msg.bbox.size_y = y2 - y1 detection_msg.results.append(result) detection_results.detections.append(detection_msg) rospy.loginfo("%d: %lf", detection_msg.results[0].id, detection_msg.results[0].score) detection_results.header.seq = cur_frame #detection_results.header.stamp = cur_stamp rospy.loginfo(detection_results.header.stamp) pub.publish(detection_results) if not os.path.exists(txt_path): os.makedirs(txt_path) #with open(f'txt/{cur_frame}.txt', 'w') as f: with open(f'{txt_path}/{cur_frame}.txt', 'w') as f: #f.write(str((float)(stamp_lines[stamp_i][-13:].strip('\n'))) + "\n") f.write(str(cur_stamp) + "\n") for detection in detection_results.detections: f.write(str(detection.bbox.center.x) + " ") f.write(str(detection.bbox.center.y) + " ") f.write(str(detection.bbox.size_x) + " ") f.write(str(detection.bbox.size_y) + " ") f.write(str(detection.results[0].id) + " ") f.write(str(detection.results[0].score) + "\n") f.close() rate.sleep() print('running speed test...') with torch.no_grad(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for _ in range(10): _, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')