コード例 #1
0
    def _construct_detection_msg_and_update_detection_image(
            self, detection_res, channel_id, stamp):
        if channel_id == Side.PORT:
            multiplier = -1
        else:
            multiplier = 1

        detection_array_msg = Detection2DArray()
        detection_array_msg.header.frame_id = self.pub_frame_id
        detection_array_msg.header.stamp = stamp

        for object_id, detection in detection_res.items():
            detection_msg = Detection2D()
            detection_msg.header = detection_array_msg.header

            object_hypothesis = ObjectHypothesisWithPose()
            object_hypothesis.id = object_id.value
            object_hypothesis.score = detection['confidence']
            object_hypothesis.pose.pose = self._detection_to_pose(
                detection['pos'], channel_id)

            # Filter out object detection outliers
            if abs(object_hypothesis.pose.pose.position.y) > self.water_depth:
                continue
            else:
                pos = self.channel_size + multiplier * detection['pos']
                self.detection_image[
                    0,
                    max(pos -
                        10, 0):min(pos + 10, self.channel_size *
                                   2), :] = self.detection_colors[object_id]

            detection_msg.results.append(object_hypothesis)
            detection_array_msg.detections.append(detection_msg)
        return detection_array_msg
コード例 #2
0
 def callback(self, ros_data):
     '''Callback function of subscribed topic. '''
     if ros_data.header.stamp < self.latest_finish_time:
         return
     #### direct conversion to CV2 ####
     now = rospy.Time.now()
     np_arr = np.fromstring(ros_data.data, np.uint8)
     image_np = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
     # dim = (int(len(image_np / 20)), int(len(image_np[0]) / 20))
     # cv2.resize(image_np, dim)
     # image_np = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) # OpenCV >= 3.0:
     results = detect(self.net, self.meta, image_np)
     detections = Detection2DArray()
     for result in results:
         detection = Detection2D()
         detection.header = ros_data.header
         res = ObjectHypothesisWithPose()
         res.id = self.get_id(result[0])
         res.score = result[1]
         detection.results.append(res)
         detection.bbox.size_x = result[2][2]
         detection.bbox.size_y = result[2][3]
         detection.bbox.center.x = result[2][0]
         detection.bbox.center.y = result[2][1]
         detections.detections.append(detection)
     self.res_pub.publish(detections)
     rospy.loginfo_throttle(
         1, 'Took yolo %s to process image' %
         ((rospy.Time.now() - now).to_sec()))
     self.latest_finish_time = rospy.Time.now()
コード例 #3
0
    def listener_callback(self, data):
        self.get_logger().info("Received an image! ")
        try:
            cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
        except CvBridgeError as e:
            print(e)

        image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
        self.timer.start()
        boxes, labels, probs = self.predictor.predict(image, 10, 0.4)
        interval = self.timer.end()
        print('Time: {:.2f}s, Detect Objects: {:d}.'.format(
            interval, labels.size(0)))

        detection_array = Detection2DArray()

        for i in range(boxes.size(0)):
            box = boxes[i, :]
            label = f"{self.class_names[labels[i]]}: {probs[i]:.2f}"
            print("Object: " + str(i) + " " + label)
            cv2.rectangle(cv_image, (box[0], box[1]), (box[2], box[3]),
                          (255, 255, 0), 4)

            # Definition of 2D array message and ading all object stored in it.
            object_hypothesis_with_pose = ObjectHypothesisWithPose()
            object_hypothesis_with_pose.id = str(self.class_names[labels[i]])
            object_hypothesis_with_pose.score = float(probs[i])

            bounding_box = BoundingBox2D()
            bounding_box.center.x = float((box[0] + box[2]) / 2)
            bounding_box.center.y = float((box[1] + box[3]) / 2)
            bounding_box.center.theta = 0.0

            bounding_box.size_x = float(2 * (bounding_box.center.x - box[0]))
            bounding_box.size_y = float(2 * (bounding_box.center.y - box[1]))

            detection = Detection2D()
            detection.header = data.header
            detection.results.append(object_hypothesis_with_pose)
            detection.bbox = bounding_box

            detection_array.header = data.header
            detection_array.detections.append(detection)

            cv2.putText(
                cv_image,
                label,
                (box[0] + 20, box[1] + 40),
                cv2.FONT_HERSHEY_SIMPLEX,
                1,  # font scale
                (255, 0, 255),
                2)  # line type
        # Displaying the predictions
        cv2.imshow('object_detection', cv_image)
        # Publishing the results onto the the Detection2DArray vision_msgs format
        self.detection_publisher.publish(detection_array)
        ros_image = self.bridge.cv2_to_imgmsg(cv_image)
        ros_image.header.frame_id = 'camera_frame'
        self.result_publisher.publish(ros_image)
        cv2.waitKey(1)
コード例 #4
0
    def object_predict(self, object_data, header, image_np, image, offset_x,
                       offset_y):
        image_height, image_width, channels = image.shape
        obj = Detection2D()
        obj_hypothesis = ObjectHypothesisWithPose()

        object_id = object_data[0]
        object_score = object_data[1]
        dimensions = object_data[2]

        # TODO - Need to make it so that the size and offset of the bounding box is corrected to the original
        # 320 x 240 image. Center x and center y need to be tested, while size x and size y need to be fixed
        obj.header = header
        obj_hypothesis.id = object_id
        obj_hypothesis.score = object_score
        obj.results.append(obj_hypothesis)
        obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height)
        obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width)
        obj.bbox.center.x = int(
            (dimensions[1] + dimensions[3]) * image_height /
            2) + offset_x  #do visualize this later
        obj.bbox.center.y = int((dimensions[0] + dimensions[2]) * image_width /
                                2) + offset_y  #do visualize this later

        return obj
コード例 #5
0
    def object_predict(self, object_data, header, image_np, image):
        image_height, image_width, channels = image.shape
        obj = Detection2D()
        obj_hypothesis = ObjectHypothesisWithPose()

        object_id = object_data[0]
        object_score = object_data[1]
        dimensions = object_data[2]

        obj.header = header
        obj_hypothesis.id = object_id
        obj_hypothesis.score = object_score
        obj.results.append(obj_hypothesis)
        obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height)
        obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width)
        obj.bbox.center.x = int(
            (dimensions[1] + dimensions[3]) * image_height / 2)
        obj.bbox.center.y = int(
            (dimensions[0] + dimensions[2]) * image_width / 2)

        # rospy.loginfo("publish bbox.size x: %d", obj.bbox.size_x)
        # rospy.loginfo("publish bbox.size y: %d", obj.bbox.size_y)
        # rospy.loginfo("publish bbox.center x: %d", obj.bbox.center.x)
        # rospy.loginfo("publish bbox.center y: %d", obj.bbox.center.y)

        return obj
コード例 #6
0
    def object_predict(self, object_data, header, image_np, image):
        image_height, image_width, channels = image.shape
        obj = Detection2D()
        obj_hypothesis = ObjectHypothesisWithPose()

        object_id = object_data[0]
        object_score = object_data[1]
        dimensions = object_data[2]

        obj.header = header
        obj_hypothesis.id = object_id
        obj_hypothesis.score = object_score
        obj.results.append(obj_hypothesis)
        obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height)
        obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width)
        obj.bbox.center.x = int(
            (dimensions[1] + dimensions[3]) * image_height / 2)
        obj.bbox.center.y = int(
            (dimensions[0] + dimensions[2]) * image_width / 2)

        #print(str(obj.bbox))
        #        print('height:'+str(image_height))
        #        print('width: '+str(image_width))

        return obj
コード例 #7
0
ファイル: yolo.py プロジェクト: Thomasklein-git/Yolo
    def callback(self):
        image = rospy.wait_for_message(
            "/zed2/zed_node/left/image_rect_color/compressed", CompressedImage)
        time1 = rospy.Time.now().to_sec()
        self.timer.header = image.header
        self.timer.header.frame_id = "zed2_left_camera_frame"
        self.timer.time_ref = rospy.Time.now()
        self.timer_pub.publish(self.timer)
        cv_image = self.bridge.compressed_imgmsg_to_cv2(image, "bgr8")
        _, bboxes = detect_image(self.yolo,
                                 cv_image,
                                 "",
                                 input_size=YOLO_INPUT_SIZE,
                                 show=False,
                                 CLASSES=TRAIN_CLASSES,
                                 score_threshold=0.3,
                                 iou_threshold=0.1,
                                 rectangle_colors=(255, 0, 0))
        detect = Detection2DArray()
        detect.header = image.header

        for Object in bboxes:
            detection = Detection2D()
            hypo = ObjectHypothesisWithPose()
            #Start x
            x1 = Object[0]
            #End x
            x2 = Object[2]
            #Start y
            y1 = Object[1]
            #end y
            y2 = Object[3]

            #Size x
            Sx = x2 - x1
            #Size y
            Sy = y2 - y1
            #Center x
            Cx = x1 + Sx / 2
            #Center y
            Cy = y1 + Sy / 2

            detection.bbox.center.x = Cx
            detection.bbox.center.y = Cy
            detection.bbox.size_x = Sx
            detection.bbox.size_y = Sy

            hypo.id = int(Object[5])
            hypo.score = Object[4]

            detection.results = [
                hypo,
            ]
            detection.is_tracking = False
            detect.detections.append(detection)
        self.boxes_pub.publish(detect)

        self.callback()
コード例 #8
0
    def detect(self, image):

        # Convert to grayscale if needed
        if image.ndim == 3:
            image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

        image_height, image_width = image.shape
        image_area = image_height * image_width

        # Apply (inverse) binary threshold to input image
        mask = cv.threshold(image, THRESHOLD, THRESHOLD_MAX,
                            cv.THRESH_BINARY_INV)[1]

        # Dilate mask to find more reliable contours
        # kernel = np.ones((5, 5), np.uint8)
        # mask_dilated = cv.dilate(mask, kernel, iterations=1)

        # Find external approximate contours in dilated mask
        contours, hierarchy = cv.findContours(mask, cv.RETR_EXTERNAL,
                                              cv.CHAIN_APPROX_SIMPLE)

        # Filter out contours that don't qualify as a detection
        detections = []
        for contour in contours:
            # Filer out if the contour touches the image border
            x, y, w, h = cv.boundingRect(contour)
            if x == 0 or y == 0 or x + w == image_width or y + h == image_height:
                continue
            # Filter out if the contour is too small
            if cv.contourArea(contour) < 1e-4 * image_area:
                continue
            detections.append((x, y, w, h))

        # Fill detections msg
        detection_array_msg = Detection2DArray()
        for detection in detections:
            x, y, w, h = detection

            center_x = x + w / 2.
            center_y = y + h / 2.
            bbox = BoundingBox2D()
            bbox.center = Pose2D(x=center_x, y=center_y, theta=0)
            bbox.size_x = w
            bbox.size_y = h

            object_hypothesis = ObjectHypothesisWithPose()
            object_hypothesis.id = 0
            object_hypothesis.score = 1.0

            detection_msg = Detection2D()
            detection_msg.bbox = bbox
            detection_msg.results.append(object_hypothesis)

            detection_array_msg.detections.append(detection_msg)

        return detection_array_msg
コード例 #9
0
ファイル: yolo_server.py プロジェクト: 459below/yolov2_ros
    def _handle_yolo_detect(self, req):
        cv_image = None
        detection_array = Detection2DArray()
        detections = []
        boxes = None
        
        try:
            cv_image = self.bridge.imgmsg_to_cv2(req.image, "bgr8")
        except CvBridgeError as e:
            rospy.logerr(e)
        try:
            boxes = self.yolo.predict(cv_image)
        except SystemError:
            pass
        # rospy.loginfo('Found {} boxes'.format(len(boxes)))
        for box in boxes:
            detection = Detection2D()
            results = []
            bbox = BoundingBox2D()
            center = Pose2D()

            detection.header = Header()
            detection.header.stamp = rospy.get_rostime()
            # detection.source_img = deepcopy(req.image)

            labels = box.get_all_labels()
            for i in range(0,len(labels)):
                object_hypothesis = ObjectHypothesisWithPose()
                object_hypothesis.id = i
                object_hypothesis.score = labels[i]
                results.append(object_hypothesis)
            
            detection.results = results

            x, y = box.get_xy_center()
            center.x = x
            center.y = y
            center.theta = 0.0
            bbox.center = center

            size_x, size_y = box.get_xy_extents()
            bbox.size_x = size_x
            bbox.size_y = size_y

            detection.bbox = bbox

            detections.append(detection)

        detection_array.header = Header()
        detection_array.header.stamp = rospy.get_rostime()
        detection_array.detections = detections

        return YoloDetectResponse(detection_array)
コード例 #10
0
    def _publish_marker_detection(self, marker, cos_sim):
        """Publish detected marker"""
        distance = self._get_distance_to_marker(marker)

        object_hypothesis = ObjectHypothesisWithPose()
        object_hypothesis.id = 1
        # the higher the cos_sim (further away from 90 degree angle between current_pose
        # and the marker), the lower the score
        object_hypothesis.score = (-cos_sim + (self.buoy_radius * 2)) / (
            self.buoy_radius * 2)

        marker_transformed = self._transform_pose(
            marker, from_frame=marker.header.frame_id)
        object_hypothesis.pose.pose = marker_transformed.pose
        # Add random noise to pose of object
        object_hypothesis.pose.pose.position.x += np.random.randn(
        ) * self.noise_sigma
        object_hypothesis.pose.pose.position.y += np.random.randn(
        ) * self.noise_sigma
        object_hypothesis.pose.pose.position.z += np.random.randn(
        ) * self.noise_sigma
        object_hypothesis.pose.pose.orientation.x += np.random.randn(
        ) * self.noise_sigma
        object_hypothesis.pose.pose.orientation.y += np.random.randn(
        ) * self.noise_sigma
        object_hypothesis.pose.pose.orientation.z += np.random.randn(
        ) * self.noise_sigma
        object_hypothesis.pose.pose.orientation.w += np.random.randn(
        ) * self.noise_sigma

        # Wrap ObjectHypothesisWithPose msg into Detection2D msg
        detection_msg = Detection2D()
        detection_msg.header.frame_id = self.published_frame_id
        detection_msg.header.stamp = rospy.Time.now()
        detection_msg.results.append(object_hypothesis)

        # Wrap Detection2D msg into Detection2DArray msg
        detection_array_msg = Detection2DArray()
        detection_array_msg.header = detection_msg.header
        detection_array_msg.detections.append(detection_msg)
        self.pub.publish(detection_array_msg)

        # Publish detection as a Marker for visualization in rviz
        detected_marker = copy.deepcopy(marker)
        detected_marker.header.stamp = rospy.Time.now()
        detected_marker.ns = 'detected_{}'.format(detected_marker.ns)
        detected_marker.color = ColorRGBA(0, 1, 0, 1)
        detected_marker.lifetime.secs = 1
        self.pub_detected_markers.publish(detected_marker)
コード例 #11
0
ファイル: yolo_c.py プロジェクト: Thomasklein-git/Yolo
    def callback_yolo(self, image):
        image = self.image
        cv_image = self.bridge.compressed_imgmsg_to_cv2(image, "bgr8")

        _, bboxes = detect_image(self.yolo,
                                 cv_image,
                                 "",
                                 input_size=YOLO_INPUT_SIZE,
                                 show=False,
                                 rectangle_colors=(255, 0, 0))
        detect = Detection2DArray()
        detect.header = image.header

        for Object in bboxes:
            detection = Detection2D()
            hypo = ObjectHypothesisWithPose()
            #Start x
            x1 = Object[0]
            #End x
            x2 = Object[2]
            #Start y
            y1 = Object[1]
            #end y
            y2 = Object[3]

            #Size x
            Sx = x2 - x1
            #Size y
            Sy = y2 - y1
            #Center x
            Cx = x1 + Sx / 2
            #Center y
            Cy = y1 + Sy / 2

            detection.bbox.center.x = Cx
            detection.bbox.center.y = Cy
            detection.bbox.size_x = Sx
            detection.bbox.size_y = Sy

            hypo.id = int(Object[5])
            hypo.score = Object[4]

            detection.results = [
                hypo,
            ]
            detect.detections.append(detection)

        self.bbox_pub.publish(detect)
コード例 #12
0
    def create_detections_msg(self, image_np, output_dict):
        img_height = image_np.shape[0]
        img_width = image_np.shape[1]

        boxes = output_dict['detection_boxes']
        classes = output_dict['detection_classes']
        scores = output_dict['detection_scores']

        detections = Detection2DArray()

        detections.header.stamp = self.get_clock().now().to_msg()
        detections.detections = []
        for i in range(len(boxes)):
            det = Detection2D()
            det.header = detections.header
            det.results = []
            detected_object = ObjectHypothesisWithPose()
            detected_object.id = classes[i].item()
            detected_object.score = scores[i].item()
            det.results.append(detected_object)

            # box is ymin, xmin, ymax, xmax in normalized coordinates
            box = boxes[i]
            det.bbox.size_y = (box[2] - box[0]) * img_height
            det.bbox.size_x = (box[3] - box[1]) * img_width
            det.bbox.center.x = (box[1] + box[3]) * img_height / 2
            det.bbox.center.y = (box[0] + box[2]) * img_width / 2

            if (self.republish_image):
                box_img = image_np[int(box[0] * img_height):int(box[2] *
                                                                img_height),
                                   int(box[1] * img_width):int(box[3] *
                                                               img_width)]

                det.source_img = img_utils.image_np_to_image_msg(box_img)

            detections.detections.append(det)

        return detections
コード例 #13
0
    def detect_objects(self, data):
        """
        This function detects and classifies the objects in the image provided through a Service Request by running on her the provided
        detection model. Returns a vision_msgs/Detection2DArray, for which each detection is populated only with the bbox and results fields. Moreover,
        for what it concerns the results field, each result is populated only with the id and score fields.
        All the other fields are not significant for this application, so they have been ignored.

        Args:
            data (sensor_msgs/Image): image to perform object detection on.

        Returns:
            pepper_object_detectionResponse: response encapsulating data regarding detected objects, structured as in service definition.
        """

        # Convert image from sensor_msgs/Image to numpy array
        img_np = ros_numpy.numpify(data.img)
        rospy.loginfo('Object detection server computing predictions...')
        detections = self._detection_model(img_np)
        rospy.loginfo('Predictions computed!')
        message = Detection2DArray()
        # Insert all the predictions into the message and return them
        for class_id, score, box in zip(detections['detection_classes'],
                                        detections['detection_scores'],
                                        detections['detection_boxes']):
            detection = Detection2D()
            detection.bbox.size_x = box[3] - box[1]
            detection.bbox.size_y = box[2] - box[0]
            detection.bbox.center.x = box[1] + detection.bbox.size_x / 2
            detection.bbox.center.y = box[0] + detection.bbox.size_y / 2
            detected_object = ObjectHypothesisWithPose()
            detected_object.score = score
            detected_object.id = class_id
            detection.results.append(detected_object)
            message.detections.append(detection)
        # Create a response object
        response = pepper_object_detectionResponse()
        response.detections = message
        return response
コード例 #14
0
    def publish_markers(self, fid_data_array):
        fidarray = FiducialTransformArray()
        fidarray.header.stamp = rospy.Time.now()
        vis = Detection2DArray()
        vis.header.stamp = rospy.Time.now()

        for fid in fid_data_array:
            if VIS_MSGS:
                obj = Detection2D()
                oh = ObjectHypothesisWithPose()
                oh.id = fid.id
                oh.pose.pose.position.x = fid.translation.x
                oh.pose.pose.position.y = fid.translation.y
                oh.pose.pose.position.z = fid.translation.z
                oh.pose.pose.orientation.w = fid.rotation.w
                oh.pose.pose.orientation.x = fid.rotation.x
                oh.pose.pose.orientation.y = fid.rotation.y
                oh.pose.pose.orientation.z = fid.rotation.z
                oh.score = math.exp(-2 * OBJECT_ERROR)

                obj.results.append(oh)
                vis.detections.append(obj)
            else:
                data = FiducialTransform()
                data.fiducial_id = fid.id
                data.transform.translation = fid.translation
                data.transform.rotation = fid.rotation
                data.image_error = IMAGE_ERROR
                data.object_error = OBJECT_ERROR
                data.fiducial_area = FIDUCIAL_AREA

                fidarray.transforms.append(data)

        if VIS_MSGS:
            self.fid_pub.publish(vis)
        else:
            self.fid_pub.publish(fidarray)
コード例 #15
0
    def object_predict(self, object_data, header, image_np, image, ID):
        image_height, image_width, channels = image.shape
        obj = Detection2D()
        obj_hypothesis = ObjectHypothesisWithPose()

        object_id = ID
        object_score = object_data[1]
        dimensions = object_data[2]

        obj.header = header
        obj_hypothesis.id = object_id
        obj_hypothesis.score = object_score
        obj.results.append(obj_hypothesis)
        obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height)
        obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width)
        #the center point of x coordinate is determined here...
        obj.bbox.center.x = int(
            (dimensions[1] + dimensions[3]) * image_width / 2)
        #the center point of y coordinate is determined here...
        obj.bbox.center.y = int(
            (dimensions[0] + dimensions[2]) * image_height / 2)

        #detection results are returned as type Detection2D to objArray...
        return obj
コード例 #16
0
    def gather_pose_results(self, header, class_id, translation, rotation, score):
        
        pose_msg = PoseStamped()
        pose_msg.header = header
        pose_msg.pose.position.x = translation[0] 
        pose_msg.pose.position.y = translation[1] 
        pose_msg.pose.position.z = translation[2] 
        pose_msg.pose.orientation.x = rotation[0]
        pose_msg.pose.orientation.y = rotation[1]
        pose_msg.pose.orientation.z = rotation[2]
        pose_msg.pose.orientation.w = rotation[3]

        # Add to detection3Darray
        detections = Detection3D()
        hypothesis = ObjectHypothesisWithPose()
        hypothesis.id = class_id
        hypothesis.pose.pose = pose_msg.pose
        hypothesis.score = score
        detections.results.append(hypothesis)
        detections.bbox.center = pose_msg.pose
        detections.bbox.size.x = self.dims[class_id][0] * 2
        detections.bbox.size.y = self.dims[class_id][1] * 2
        detections.bbox.size.z = self.dims[class_id][2] * 2
        return pose_msg, detections
コード例 #17
0
    def detect_multi(self, images):

        image_shape = images[0].shape

        images_list = []
        for image in images:
            image = cv.resize(image,
                              self.image_size[::-1])  # cv uses width x height!
            image = image[..., np.newaxis]
            if not self.grayscale and image.shape[-1] == 1:
                image = cv.cvtColor(image, cv.COLOR_GRAY2RGB)
            image_torch = self.transform(image)[np.newaxis, ...]
            images_list.append(image_torch)

        images_torch = torch.cat(images_list, 0)

        if self.half:
            images_torch = images_torch.half()

        if self.has_gpu and self.use_gpu:
            images_torch = images_torch.cuda()

        with torch.no_grad():
            # Raw detection is a (N x 8190 x num_classes) tensor
            start_time_inf = self.time_synchronized()
            raw_detections, _ = self.model(images_torch)
            elapsed_time_inf = self.time_synchronized() - start_time_inf
            if self.verbose:
                print('time (inf): {:.4f}s'.format(elapsed_time_inf))

        if self.half:
            raw_detections = raw_detections.float()

        start_time_nms = self.time_synchronized()
        detections_torch = self.non_max_suppression(
            raw_detections.float(),
            conf_thres=self.confidence_threshold,
            iou_thres=self.iou_threshold)
        elapsed_time_nms = self.time_synchronized() - start_time_nms
        if self.verbose:
            print('time (nms): {:.4f}s'.format(elapsed_time_nms))

        if self.verbose:
            print('time (tot): {:.4f}s\n'.format(elapsed_time_inf +
                                                 elapsed_time_nms))

        detection_array_list = []
        for detection_torch in detections_torch:

            detection_array_msg = Detection2DArray()

            if detection_torch is not None:

                # Rescale bounding boxes back to original (old) image size
                # shape_old: Shape of images as they come in
                # shape_new: Shape of images used for inference
                detection_torch = rescale_boxes(detection_torch,
                                                shape_old=image_shape[:2],
                                                shape_new=self.image_size)
                detections = detection_torch.cpu().numpy()

                for i, detection in enumerate(detections):
                    x1, y1, x2, y2, object_conf, class_pred = detection

                    class_id = int(class_pred)
                    # class_name = self.classes[class_id]

                    size_x = (x2 - x1)
                    size_y = (y2 - y1)
                    x = x1 + (size_x / 2.0)
                    y = y1 + (size_y / 2.0)
                    bbox = BoundingBox2D()
                    bbox.center = Pose2D(x=x, y=y, theta=0)
                    bbox.size_x = size_x
                    bbox.size_y = size_y

                    object_hypothesis = ObjectHypothesisWithPose()
                    object_hypothesis.id = class_id
                    object_hypothesis.score = object_conf

                    detection_msg = Detection2D()
                    detection_msg.bbox = bbox
                    detection_msg.results.append(object_hypothesis)

                    detection_array_msg.detections.append(detection_msg)

            detection_array_list.append(detection_array_msg)

        return detection_array_list
コード例 #18
0
ファイル: ros_detect.py プロジェクト: Shanki5/WSR_ROS
def detect(img, camera_frame_id, sensor_image):

    time1 = time.time()

    global ros_image
    cudnn.benchmark = True
    dataset = loadimg(img)
    # print(dataset[3])
    #plt.imshow(dataset[2][:, :, ::-1])
    names = model.module.names if hasattr(model, 'module') else model.names
    #colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
    #colors=[[0,255,0]]
    augment = 'store_true'
    conf_thres = 0.3
    iou_thres = 0.45
    classes = (0, 1, 2, 3, 5, 7)
    agnostic_nms = 'store_true'
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    path = dataset[0]
    img = dataset[1]
    im0s = dataset[2]
    vid_cap = dataset[3]
    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0

    time2 = time.time()
    if img.ndimension() == 3:
        img = img.unsqueeze(0)
    # Inference
    pred = model(img, augment=augment)[0]
    # Apply NMS
    pred = non_max_suppression(pred,
                               conf_thres,
                               iou_thres,
                               classes=classes,
                               agnostic=agnostic_nms)

    view_img = 1
    save_txt = 1
    save_conf = 'store_true'
    time3 = time.time()

    waste_detections = Detection2DArray()
    waste_detections.header = Header()
    waste_detections.header.stamp = rospy.get_rostime()
    waste_detections.header.frame_id = camera_frame_id

    for i, det in enumerate(pred):  # detections per image
        det_obj = Detection2D()
        det_obj.header = waste_detections.header
        # det_obj.source_img = sensor_image
        result = ObjectHypothesisWithPose()

        p, s, im0 = path, '', im0s
        s += '%gx%g ' % img.shape[2:]  # print string
        gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
        if det is not None:
            #print(det)
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0.shape).round()
            # Print results
            for c in det[:, -1].unique():
                n = (det[:, -1] == c).sum()  # detections per class
                s += '%g %ss, ' % (n, names[int(c)])  # add to string
                # Write results
            for *xyxy, conf, cls in reversed(det):
                if save_txt:  # Write to file
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                            gn).view(-1).tolist()  # normalized xywh
                    line = (cls, conf,
                            *xywh) if save_conf else (cls,
                                                      *xywh)  # label format
                    det_obj.bbox.center.x = xywh[0] + xywh[2] / 2
                    det_obj.bbox.center.y = xywh[1] + xywh[3] / 2
                    det_obj.bbox.size_x = xywh[2]
                    det_obj.bbox.size_y = xywh[3]
                    result.id = int(cls)
                    result.score = conf

                if view_img:  # Add bbox to image
                    label = '%s %.2f' % (names[int(cls)], conf)
                    plot_one_box(xyxy,
                                 im0,
                                 label=label,
                                 color=[0, 255, 0],
                                 line_thickness=3)
                det_obj.results.append(result)
            waste_detections.detections.append(det_obj)
    time4 = time.time()
    print('************')
    print('2-1', time2 - time1)
    print('3-2', time3 - time2)
    print('4-3', time4 - time3)
    print('total', time4 - time1)
    out_img = im0[:, :, [2, 1, 0]]
    ros_image = out_img
    cv2.imshow('YOLOV5', out_img)
    a = cv2.waitKey(1)
    #### Create CompressedIamge ####
    if (waste_detections.detections.__sizeof__ != 0):
        det_pub.publish(waste_detections)
    publish_image(im0)
コード例 #19
0
ファイル: dope_image.py プロジェクト: SBPL-Cruz/perception
    def run_on_image(self, image_filename, category_names_to_id, output_image_filepath):
        
        camera_info = self.camera_info

        if not self.info_manager.isCalibrated():
            rospy.logwarn('Camera is not calibrated, please supply a valid camera_info_url parameter!')
        img = cv2.imread(image_filename)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Update camera matrix and distortion coefficients

        # camera_matrix = np.matrix(camera_info.K, dtype='float64')
        # camera_matrix.resize((3, 3))
        # camera_matrix = np.array([[768.16058349609375, 0, 480], 
        #              [0, 768.16058349609375, 270], 
        #              [0, 0, 1]])
        # dist_coeffs = np.matrix(camera_info.D, dtype='float64')
        # dist_coeffs.resize((len(camera_info.D), 1))

        if self.input_is_rectified:
            P = np.matrix(camera_info.P, dtype='float64')
            P.resize((3, 4))
            camera_matrix = P[:, :3]
            dist_coeffs = np.zeros((4, 1))
        else:
            camera_matrix = np.matrix(camera_info.K, dtype='float64')
            camera_matrix.resize((3, 3))
            dist_coeffs = np.matrix(camera_info.D, dtype='float64')
            dist_coeffs.resize((len(camera_info.D), 1))

        # Downscale image if necessary
        height, width, _ = img.shape
        scaling_factor = float(self.downscale_height) / height
        if scaling_factor < 1.0:
            camera_matrix[:2] *= scaling_factor
            img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height)))

        for m in self.models:
            self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix)
            self.pnp_solvers[m].set_dist_coeffs(dist_coeffs)

        # Copy and draw image
        img_copy = img.copy()
        im = Image.fromarray(img_copy)
        draw = Draw(im)

        detection_array = Detection3DArray()
        # detection_array.header = image_msg.header
        detection_array.header = "camera"
        annotations = []
        for m in self.models:
            # Detect object
            results = ObjectDetector.detect_object_in_image(
                self.models[m].net,
                self.pnp_solvers[m],
                img,
                self.config_detect
            )
            print("results : {}".format(results))

            # Publish pose and overlay cube on image
            for i_r, result in enumerate(results):
                if result["location"] is None:
                    continue
                loc = result["location"]
                ori = result["quaternion"]

                # transform orientation
                # transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m])
                transformed_ori = quaternion_multiply(ori, self.model_transforms[m])

                # rotate bbox dimensions if necessary
                # (this only works properly if model_transform is in 90 degree angles)
                dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m])
                dims = np.absolute(dims)
                dims = tuple(dims)

                pose_msg = PoseStamped()
                # pose_msg.header = image_msg.header
                pose_msg.header.stamp = rospy.Time.now()
                pose_msg.header.frame_id = "camera"
                CONVERT_SCALE_CM_TO_METERS = 100
                pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.orientation.x = transformed_ori[0]
                pose_msg.pose.orientation.y = transformed_ori[1]
                pose_msg.pose.orientation.z = transformed_ori[2]
                pose_msg.pose.orientation.w = transformed_ori[3]

                annotations.append({
                                'location' : loc,
                                'quaternion_xyzw' : transformed_ori.tolist(),
                                'category_id' : category_names_to_id[m],
                                'id' : i_r
                            })
                # Publish
                self.pubs[m].publish(pose_msg)
                # self.pub_dimension[m].publish(str(dims))

                # Add to Detection3DArray
                detection = Detection3D()
                hypothesis = ObjectHypothesisWithPose()
                hypothesis.id = self.class_ids[result["name"]]
                hypothesis.score = result["score"]
                hypothesis.pose.pose = pose_msg.pose
                detection.results.append(hypothesis)
                detection.bbox.center = pose_msg.pose
                detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS
                detection_array.detections.append(detection)

                # Draw the cube
                if None not in result['projected_points']:
                    points2d = []
                    for pair in result['projected_points']:
                        points2d.append(tuple(pair))
                    draw.draw_cube(points2d, self.draw_colors[m])

        # Publish the image with results overlaid
        final_im = np.array(im)[..., ::-1]
        self.pub_rgb_dope_points.publish(
            CvBridge().cv2_to_imgmsg(
                final_im,
                "bgr8"
            )
        )
        cv2.imwrite(output_image_filepath, np.array(im)[..., ::-1])
        self.pub_detections.publish(detection_array)
        self.publish_markers(detection_array)
        return annotations
コード例 #20
0
ファイル: dope_image.py プロジェクト: SBPL-Cruz/perception
    def run_on_image_icp(self, 
            image_filename, category_names_to_id, cloud_in, output_image_filepath, publish_cloud=True):

        """Image callback"""
        
        camera_info = self.camera_info

        if not self.info_manager.isCalibrated():
            rospy.logwarn('Camera is not calibrated, please supply a valid camera_info_url parameter!')
        img = cv2.imread(image_filename)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Update camera matrix and distortion coefficients
        if self.input_is_rectified:
            P = np.matrix(camera_info.P, dtype='float64')
            P.resize((3, 4))
            camera_matrix = P[:, :3]
            dist_coeffs = np.zeros((4, 1))
        else:
            camera_matrix = np.matrix(camera_info.K, dtype='float64')
            camera_matrix.resize((3, 3))
            dist_coeffs = np.matrix(camera_info.D, dtype='float64')
            dist_coeffs.resize((len(camera_info.D), 1))

        # Downscale image if necessary
        height, width, _ = img.shape
        scaling_factor = float(self.downscale_height) / height
        if scaling_factor < 1.0:
            camera_matrix[:2] *= scaling_factor
            img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height)))

        for m in self.models:
            self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix)
            self.pnp_solvers[m].set_dist_coeffs(dist_coeffs)

        # Copy and draw image
        img_copy = img.copy()
        im = Image.fromarray(img_copy)
        draw = Draw(im)

        detection_array = Detection3DArray()
        detection_array.header = "camera"
        annotations = []

        start_time = time.time()

        for m in self.models:
            # Detect object
            results = ObjectDetector.detect_object_in_image(
                self.models[m].net,
                self.pnp_solvers[m],
                img,
                self.config_detect
            )

            # Publish pose and overlay cube on image
            for i_r, result in enumerate(results):
                if result["location"] is None:
                    continue
                print(result)
                loc = result["location"]
                ori = result["quaternion"]
                CONVERT_SCALE_CM_TO_METERS = 100
                pose_frame = "camera"

                rospy.logwarn("Doing ICP for result : {}, {}".format(i_r, result["name"]))
                loc_scale = np.array([loc[0] / 100, loc[1] / 100, loc[2] / 100])
                R = tf.transformations.quaternion_matrix(ori)
                T = tf.transformations.translation_matrix(loc_scale)

                total_transform = tf.transformations.concatenate_matrices(T, R)
                cloud_filtered_array = self.transform_cloud(self.mesh_clouds[result["name"]], mat=total_transform)
                cloud_color = np.zeros(cloud_filtered_array.shape[0])
                ros_msg = self.xyzrgb_array_to_pointcloud2(
                    cloud_filtered_array, cloud_color, rospy.Time.now(), "camera"
                )
                # self.pub_pose_cloud.publish(ros_msg)
                
                # rospy.logwarn("Num points after downsample and filter : {}".format(cloud_filtered_array.shape[0]))
                
                cloud_pose = pcl.PointCloud()
                cloud_pose.from_array(cloud_filtered_array)

                icp = cloud_pose.make_GeneralizedIterativeClosestPoint()
                converged, transf, estimate, fitness = icp.gicp(cloud_pose, cloud_in, max_iter=25)
                print('has converged:' + str(converged) + ' score: ' + str(fitness))
                print(str(transf))
                total_transform_icp = tf.transformations.concatenate_matrices(transf, total_transform)
                print(str(total_transform_icp))

                loc = tf.transformations.translation_from_matrix(total_transform_icp) * 100
                ori = tf.transformations.quaternion_from_matrix(total_transform_icp)
                pose_frame = "camera"

                if publish_cloud :
                    cloud_filtered_array = self.transform_cloud(self.mesh_clouds[result["name"]], mat=total_transform_icp)
                    cloud_color = np.zeros(cloud_filtered_array.shape[0])
                    ros_msg = self.xyzrgb_array_to_pointcloud2(
                        cloud_filtered_array, cloud_color, rospy.Time.now(), "camera"
                    )
                    self.pub_pose_cloud.publish(ros_msg)

                # transform orientation
                transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m])

                # rotate bbox dimensions if necessary
                # (this only works properly if model_transform is in 90 degree angles)
                dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m])
                dims = np.absolute(dims)
                dims = tuple(dims)

                pose_msg = PoseStamped()
                # pose_msg.header = image_msg.header
                pose_msg.header.stamp = rospy.Time.now()
                pose_msg.header.frame_id = pose_frame
                pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.orientation.x = transformed_ori[0]
                pose_msg.pose.orientation.y = transformed_ori[1]
                pose_msg.pose.orientation.z = transformed_ori[2]
                pose_msg.pose.orientation.w = transformed_ori[3]


                annotations.append({
                                'location' : loc,
                                'quaternion_xyzw' : transformed_ori.tolist(),
                                'category_id' : category_names_to_id[m],
                                'id' : i_r
                            })
                # Publish
                self.pubs[m].publish(pose_msg)
                self.pub_dimension[m].publish(str(dims))

                # Add to Detection3DArray
                detection = Detection3D()
                hypothesis = ObjectHypothesisWithPose()
                hypothesis.id = self.class_ids[result["name"]]
                hypothesis.score = result["score"]
                hypothesis.pose.pose = pose_msg.pose
                detection.results.append(hypothesis)
                detection.bbox.center = pose_msg.pose
                detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS
                detection_array.detections.append(detection)

                # Draw the cube
                if None not in result['projected_points'] and publish_cloud:
                    points2d = []
                    for pair in result['projected_points']:
                        points2d.append(tuple(pair))
                    draw.draw_cube(points2d, self.draw_colors[m])

        # Publish the image with results overlaid
        runtime = (time.time() - start_time)
        self.pub_rgb_dope_points.publish(
            CvBridge().cv2_to_imgmsg(
                np.array(im)[..., ::-1],
                "bgr8"
            )
        )
        final_im = np.array(im)[..., ::-1]
        cv2.imwrite(output_image_filepath, np.array(im)[..., ::-1])
        self.pub_detections.publish(detection_array)
        self.publish_markers(detection_array)

        return annotations, runtime
コード例 #21
0
def run_dope_node(params, freq=5):
    '''Starts ROS node to listen to image topic, run DOPE, and publish DOPE results'''

    global g_img
    global g_draw

    pubs = {}
    models = {}
    pnp_solvers = {}
    pub_dimension = {}
    draw_colors = {}
    class_ids = {}
    model_transforms = {}
    dimensions = {}
    mesh_scales = {}
    meshes = {}

    # Initialize parameters
    matrix_camera = np.zeros((3,3))
    matrix_camera[0,0] = params["camera_settings"]['fx']
    matrix_camera[1,1] = params["camera_settings"]['fy']
    matrix_camera[0,2] = params["camera_settings"]['cx']
    matrix_camera[1,2] = params["camera_settings"]['cy']
    matrix_camera[2,2] = 1
    dist_coeffs = np.zeros((4,1))

    if "dist_coeffs" in params["camera_settings"]:
        dist_coeffs = np.array(params["camera_settings"]['dist_coeffs'])
    config_detect = lambda: None
    config_detect.mask_edges = 1
    config_detect.mask_faces = 1
    config_detect.vertex = 1
    config_detect.threshold = 0.5
    config_detect.softmax = 1000
    config_detect.thresh_angle = params['thresh_angle']
    config_detect.thresh_map = params['thresh_map']
    config_detect.sigma = params['sigma']
    config_detect.thresh_points = params["thresh_points"]

    # For each object to detect, load network model, create PNP solver, and start ROS publishers
    for model in params['weights']:
        models[model] =\
            ModelData(
                model, 
                g_path2package + "/weights/" + params['weights'][model]
            )
        models[model].load_net_model()
        mesh_scales[model] = 1.0
        model_transforms[model] = np.array([0.0, 0.0, 0.0, 1.0], dtype='float64')
        draw_colors[model] = \
            tuple(params["draw_colors"][model])
        class_ids[model] = \
            (params["class_ids"][model])
        dimensions[model] = tuple(params["dimensions"][model])
        pnp_solvers[model] = \
            CuboidPNPSolver(
                model,
                matrix_camera,
                Cuboid3d(params['dimensions'][model]),
                dist_coeffs=dist_coeffs
            )
        pubs[model] = \
            rospy.Publisher(
                '{}/pose_{}'.format(params['topic_publishing'], model), 
                PoseStamped, 
                queue_size=10
            )
        pub_dimension[model] = \
            rospy.Publisher(
                '{}/dimension_{}'.format(params['topic_publishing'], model),
                String, 
                queue_size=10
            )

    # Start ROS publisher
    pub_rgb_dope_points = \
        rospy.Publisher(
            params['topic_publishing']+"/rgb_points", 
            ImageSensor_msg, 
            queue_size=10
        )
    pub_detections = \
        rospy.Publisher(
            'detected_objects',
            Detection3DArray,
            queue_size=10
        )
    pub_markers = \
            rospy.Publisher(
                'markers',
                MarkerArray,
                queue_size=10
            )
    
    # Starts ROS listener
    rospy.Subscriber(
        topic_cam, 
        ImageSensor_msg, 
        __image_callback
    )

    # Initialize ROS node
    rospy.init_node('dope_vis', anonymous=True)
    rate = rospy.Rate(freq)

    print ("Running DOPE...  (Listening to camera topic: '{}')".format(topic_cam)) 
    print ("Ctrl-C to stop")

    while not rospy.is_shutdown():
        if g_img is not None:
            # Copy and draw image
            img_copy = g_img.copy()
            im = Image.fromarray(img_copy)
            g_draw = ImageDraw.Draw(im)

            detection_array = Detection3DArray()
            detection_array.header = image_msg.header


            for m in models:
                # Detect object
                results = ObjectDetector.detect_object_in_image(
                            models[m].net, 
                            pnp_solvers[m],
                            g_img,
                            config_detect
                            )
                
                # Publish pose and overlay cube on image
                for i_r, result in enumerate(results):
                    if result["location"] is None:
                        continue
                    loc = result["location"]
                    ori = result["quaternion"]  

                    # transform orientation
                    transformed_ori = tf.transformations.quaternion_multiply(ori, model_transforms[m])

                    # rotate bbox dimensions if necessary
                    # (this only works properly if model_transform is in 90 degree angles)
                    dims = rotate_vector(vector=dimensions[m], quaternion=model_transforms[m])
                    dims = np.absolute(dims)
                    dims = tuple(dims)
                                    
                    msg = PoseStamped()
                    msg.header.frame_id = params["frame_id"]
                    msg.header.stamp = rospy.Time.now()
                    CONVERT_SCALE_CM_TO_METERS = 100
                    msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS
                    msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS
                    msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS
                    msg.pose.orientation.x = ori[0]
                    msg.pose.orientation.y = ori[1]
                    msg.pose.orientation.z = ori[2]
                    msg.pose.orientation.w = ori[3]

                    # Publish
                    pubs[m].publish(msg)
                    pub_dimension[m].publish(str(params['dimensions'][m]))

                    # Add to Detection3DArray
                    detection = Detection3D()
                    hypothesis = ObjectHypothesisWithPose()
                    hypothesis.id = class_ids[result["name"]]
                    hypothesis.score = result["score"]
                    hypothesis.pose.pose = msg.pose
                    detection.results.append(hypothesis)
                    detection.bbox.center = msg.pose
                    detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS
                    detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS
                    detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS
                    detection_array.detections.append(detection)

                    # Draw the cube
                    if None not in result['projected_points']:
                        points2d = []
                        for pair in result['projected_points']:
                            points2d.append(tuple(pair))
                        DrawCube(points2d, draw_colors[m])
                
            # Publish the image with results overlaid
            pub_rgb_dope_points.publish(
                CvBridge().cv2_to_imgmsg(
                    np.array(im)[...,::-1], 
                    "bgr8"
                )
            )

            # Delete all existing markers
            markers = MarkerArray()
            marker = Marker()
            marker.action = Marker.DELETEALL
            markers.markers.append(marker)
            pub_markers.publish(markers)

            # Object markers
            class_id_to_name = {class_id: name for name, class_id in class_ids.iteritems()}
            markers = MarkerArray()
            for i, det in enumerate(detection_array.detections):
                name = class_id_to_name[det.results[0].id]
                color = draw_colors[name]

                # cube marker
                marker = Marker()
                marker.header = detection_array.header
                marker.action = Marker.ADD
                marker.pose = det.bbox.center
                marker.color.r = color[0] / 255.0
                marker.color.g = color[1] / 255.0
                marker.color.b = color[2] / 255.0
                marker.color.a = 0.4
                marker.ns = "bboxes"
                marker.id = i
                marker.type = Marker.CUBE
                marker.scale = det.bbox.size
                markers.markers.append(marker)
            
            pub_markers.publish(markers)
            pub_detections.publish(detection_array)

        rate.sleep()
コード例 #22
0
ファイル: dope_image.py プロジェクト: SBPL-Cruz/perception
    def image_callback(self, image_msg, category_names_to_id):
        """Image callback"""

        img = self.cv_bridge.imgmsg_to_cv2(image_msg, "rgb8")
        # cv2.imwrite('img.png', cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # for debugging

        # Update camera matrix and distortion coefficients
        if self.input_is_rectified:
            P = np.matrix(camera_info.P, dtype='float64')
            P.resize((3, 4))
            camera_matrix = P[:, :3]
            dist_coeffs = np.zeros((4, 1))
        else:
            camera_matrix = np.matrix(camera_info.K, dtype='float64')
            camera_matrix.resize((3, 3))
            dist_coeffs = np.matrix(camera_info.D, dtype='float64')
            dist_coeffs.resize((len(camera_info.D), 1))

        # Downscale image if necessary
        height, width, _ = img.shape
        scaling_factor = float(self.downscale_height) / height
        if scaling_factor < 1.0:
            camera_matrix[:2] *= scaling_factor
            img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height)))

        for m in self.models:
            self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix)
            self.pnp_solvers[m].set_dist_coeffs(dist_coeffs)

        # Copy and draw image
        img_copy = img.copy()
        im = Image.fromarray(img_copy)
        draw = Draw(im)

        detection_array = Detection3DArray()
        detection_array.header = image_msg.header

        for m in self.models:
            # Detect object
            results = ObjectDetector.detect_object_in_image(
                self.models[m].net,
                self.pnp_solvers[m],
                img,
                self.config_detect
            )

            # Publish pose and overlay cube on image
            for i_r, result in enumerate(results):
                if result["location"] is None:
                    continue
                loc = result["location"]
                ori = result["quaternion"]

                # transform orientation
                transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m])

                # rotate bbox dimensions if necessary
                # (this only works properly if model_transform is in 90 degree angles)
                dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m])
                dims = np.absolute(dims)
                dims = tuple(dims)

                pose_msg = PoseStamped()
                pose_msg.header = image_msg.header
                CONVERT_SCALE_CM_TO_METERS = 100
                pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.orientation.x = transformed_ori[0]
                pose_msg.pose.orientation.y = transformed_ori[1]
                pose_msg.pose.orientation.z = transformed_ori[2]
                pose_msg.pose.orientation.w = transformed_ori[3]

                
                # Publish
                self.pubs[m].publish(pose_msg)
                self.pub_dimension[m].publish(str(dims))

                # Add to Detection3DArray
                detection = Detection3D()
                hypothesis = ObjectHypothesisWithPose()
                hypothesis.id = self.class_ids[result["name"]]
                hypothesis.score = result["score"]
                hypothesis.pose.pose = pose_msg.pose
                detection.results.append(hypothesis)
                detection.bbox.center = pose_msg.pose
                detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS
                detection_array.detections.append(detection)

                # Draw the cube
                if None not in result['projected_points']:
                    points2d = []
                    for pair in result['projected_points']:
                        points2d.append(tuple(pair))
                    draw.draw_cube(points2d, self.draw_colors[m])

        # Publish the image with results overlaid
        self.pub_rgb_dope_points.publish(
            CvBridge().cv2_to_imgmsg(
                np.array(im)[..., ::-1],
                "bgr8"
            )
        )
        self.pub_detections.publish(detection_array)
        self.publish_markers(detection_array)

        return annotations
コード例 #23
0
	def callback(self, imageMsg):
		image = self.bridge.imgmsg_to_cv2(imageMsg, "bgr8")
		image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
		image = image[:, :, ::-1].copy()

		# copy to draw on
		draw = image.copy()
		draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

		# Image formatting specific to Retinanet
		image = preprocess_image(image)
		image, scale = resize_image(image)

		# Run the inferencer
		try:
			with self.session.as_default():
				with self.session.graph.as_default():
					boxes, scores, labels = self.model.predict_on_batch(np.expand_dims(image, axis=0))
		except Exception as e:
			rospy.logerr(e)
			rospy.logwarn("WARNING: Has your model been converted to an inference model yet? "
				"see https://github.com/fizyr/keras-retinanet#converting-a-training-model-to-inference-model")
			return

		# correct for image scale
		boxes /= scale

		# Construct the detection message
		header = Header(frame_id=imageMsg.header.frame_id)
		detections_message_out = Detection2DArray()
		detections_message_out.header = header
		detections_message_out.detections = []

		# visualize detections
		for box, score, label in zip(boxes[0], scores[0], labels[0]):
			# scores are sorted so we can break
			if score < self.confidence_cutoff:
				break

			# Add boxes and captions
			b = np.array(box).astype(int)
			cv2.rectangle(draw, (b[0], b[1]), (b[2], b[3]), self.color, self.thickness, cv2.LINE_AA)

			if (label > len(self.labels_to_names)):
				print("WARNING: Got unknown label, using 'detection' instead")
				caption = "Detection {:.3f}".format(score)
			else:
				caption = "{} {:.3f}".format(self.labels_to_names[label], score)

			cv2.putText(draw, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
			cv2.putText(draw, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

			#Construct the output detection message
			bb = BoundingBox2D()
			det = Detection2D(header=header)
			hyp = ObjectHypothesisWithPose()
			center = Pose2D()
			hyp.id = label
			hyp.score = score
			bb.size_x = b[2] - b[0]
			bb.size_y = b[3] - b[1]
			center.x = float(b[2] + b[0])/2
			center.y = float(b[3] + b[1])/2
			bb.center = center
			det.results.append(hyp)
			det.bbox = bb
			detections_message_out.detections.append(det)

		self.detections_pub.publish(detections_message_out)

		# Write out image
		image_message_out = self.bridge.cv2_to_imgmsg(draw, encoding="rgb8")
		self.img_pub.publish(image_message_out)
コード例 #24
0
ファイル: detect_faces.py プロジェクト: vventirozos/robot_ros
    def callback(self, data):
        bridge = CvBridge()
        try:
            cv_image = self.bridge.compressed_imgmsg_to_cv2(
                data, desired_encoding="passthrough")
        except CvBridgeError as e:
            rospy.logerr(e)

        results = self.engine.DetectWithImage(PIL.Image.fromarray(cv_image),
                                              top_k=1,
                                              threshold=self.threshold,
                                              keep_aspect_ratio=True,
                                              relative_coord=True)

        detections = Detection2DArray()
        now = rospy.get_rostime()

        for detection in results:

            top_left, bottom_right = detection.bounding_box
            min_x, min_y = top_left
            max_x, max_y = bottom_right
            imheight, imwidth, _ = cv_image.shape
            min_x *= imwidth
            max_x *= imwidth
            min_y *= imheight
            max_y *= imheight
            centre_x = (max_x + min_x) / 2.0
            centre_y = (max_y + min_y) / 2.0
            height = max_y - min_y
            width = max_x - min_x
            if height <= 0 or width <= 0:
                continue

            bbox = BoundingBox2D()
            bbox.center.x = centre_x
            bbox.center.y = centre_y
            bbox.size_x = width
            bbox.size_y = height

            hypothesis = ObjectHypothesisWithPose()
            #            hypothesis.id = str(detection.label_id)
            hypothesis.score = detection.score
            hypothesis.pose.pose.position.x = centre_x
            hypothesis.pose.pose.position.y = centre_y

            # update the timestamp of the object
            object = Detection2D()
            object.header.stamp = now
            object.header.frame_id = data.header.frame_id
            object.results.append(hypothesis)
            object.bbox = bbox
            object.source_img.header.frame_id = data.header.frame_id
            object.source_img.header.stamp = now
            object.source_img.height = int(height)
            object.source_img.width = int(width)
            object.source_img.encoding = "rgb8"
            object.source_img.step = int(width * 3)
            #            object.source_img.data = cv_image[int(min_y):int(max_y), int(min_x):int(max_x)].tobytes()

            detections.detections.append(object)

        if len(results) > 0:
            self.detection_pub.publish(detections)

        rospy.logdebug("%.2f ms" % self.engine.get_inference_time())
コード例 #25
0
    def object_predict(self, object_data, header, image_np, image):
        image_height, image_width, channels = image.shape
        obj = Detection2D()
        obj_hypothesis = ObjectHypothesisWithPose()

        object_id = object_data[0]
        object_score = object_data[1]
        dimensions = object_data[2]

        obj.header = header
        obj_hypothesis.id = object_id
        obj_hypothesis.score = object_score
        #obj.results.append(obj_hypothesis)
        obj.bbox.size_y = int((dimensions[2] - dimensions[0]) * image_height)
        obj.bbox.size_x = int((dimensions[3] - dimensions[1]) * image_width)
        obj.bbox.center.x = int(
            (dimensions[1] + dimensions[3]) * image_width / 2)
        obj.bbox.center.y = int(
            (dimensions[0] + dimensions[2]) * image_height / 2)

        ###################################
        pixelDiametro = obj.bbox.size_x
        # choose the bigest size
        if (obj.bbox.size_x > obj.bbox.size_y):
            pixelDiametro = obj.bbox.size_x
        else:
            pixelDiametro = obj.bbox.size_y

        #DIAMETER_LANDMARCK_M = 0.24 OR 0.5
        metersDiametroLandmarck = self.DIAMETER_LANDMARCK_M

        #DISTANCE_FOCAL = 750
        distFocus_real = self.DISTANCE_FOCAL

        altura = float(
            (metersDiametroLandmarck * distFocus_real) / pixelDiametro)

        # rospy.loginfo("--------------------------------")
        # rospy.loginfo("Diametro Marcador Real:  %f", metersDiametroLandmarck)
        # # rospy.loginfo("Distancia Focal Real:    %f", distFocus_real)
        # rospy.loginfo("Diametro (pixel):        %f", pixelDiametro)
        # rospy.loginfo("Altura Drone (m):        %f", altura)
        ###################################

        pixel_x = int((obj.bbox.center.x - (image_width / 2)) * (-1))
        pixel_y = int((obj.bbox.center.y - (image_height / 2)) * (1))

        k = float(metersDiametroLandmarck / pixelDiametro)

        obj_hypothesis.pose.pose.position.x = pixel_x * k
        obj_hypothesis.pose.pose.position.y = pixel_y * k
        obj_hypothesis.pose.pose.position.z = altura
        obj.results.append(obj_hypothesis)

        #rospy.loginfo("publish obj_hypothesis.score: %d", object_score)
        # rospy.loginfo("publish bbox.size x: %d", obj.bbox.size_x)
        # rospy.loginfo("publish bbox.size y: %d", obj.bbox.size_y)
        # rospy.loginfo("publish bbox.center x: %d", obj.bbox.center.x)
        # rospy.loginfo("publish bbox.center y: %d", obj.bbox.center.y)

        return obj
コード例 #26
0
    def callback_synchronize(self, msg_img, msg_bbox, cam_id):

        cv_img = self.cv_bridge.imgmsg_to_cv2(msg_img)
        new_bbox = Detection2DArray()

        print("aaaaa")

        for detection in msg_bbox.detections:

            box = Detection2D()
            box.bbox = detection.bbox
            pose = ObjectHypothesisWithPose()

            print(detection.results[0].id)

            if detection.results[0].id == 7:  # id:7 Traffic light

                x_min = int(detection.bbox.center.x -
                            (detection.bbox.size_x / 2))
                y_min = int(detection.bbox.center.y -
                            (detection.bbox.size_y / 2))
                x_max = int(detection.bbox.center.x +
                            (detection.bbox.size_x / 2))
                y_max = int(detection.bbox.center.y +
                            (detection.bbox.size_y / 2))

                img_cropped = cv_img[y_min:y_max, x_min:x_max]

                preprocessed_img = self.preprocess_img(img=img_cropped)
                predict_f = self.model_traffic_light(preprocessed_img)
                predict = predict_f.data.max(1, keepdim=True)[1]
                pose.id = int(1000 + int(predict))
                cnn_prob = float(torch.exp(predict_f)[0][int(predict)])
                pose.score = cnn_prob
                box.results.append(pose)

                # pose.id = int(1000 + int(predict))
                # box.results.append(pose)
                if self.validate_light_with_brightness_region(
                        img_cropped, int(predict)):
                    new_bbox.detections.append(box)

            elif detection.results[0].id == 8:  # id:8 Traffic sign

                x_min = int(detection.bbox.center.x -
                            (detection.bbox.size_x / 2))
                y_min = int(detection.bbox.center.y -
                            (detection.bbox.size_y / 2))
                x_max = int(detection.bbox.center.x +
                            (detection.bbox.size_x / 2))
                y_max = int(detection.bbox.center.y +
                            (detection.bbox.size_y / 2))

                img_cropped = cv_img[y_min:y_max, x_min:x_max]

                preprocessed_img = self.preprocess_img(img=img_cropped)
                predict_f = self.model_traffic_sign(preprocessed_img)
                predict = predict_f.data.max(1, keepdim=True)[1]
                cnn_prob = float(torch.exp(predict_f)[0][int(predict)])
                pose.score = cnn_prob
                pose.id = int(2000 + int(predict))
                box.results.append(pose)
                new_bbox.detections.append(box)

            else:
                box = detection
                new_bbox.detections.append(box)

        new_bbox.header = msg_bbox.header
        if cam_id == "cam_fm_01":
            self.pub_fm01_new_bbox.publish(new_bbox)
        elif cam_id == "cam_fr_01":
            self.pub_fr01_new_bbox.publish(new_bbox)
        elif cam_id == "cam_fl_01":
            self.pub_fl01_new_bbox.publish(new_bbox)
コード例 #27
0
def EfficientDetNode():
    rospy.init_node('efficient_det_node', anonymous=True)
    rospy.Subscriber('input', String, image_callback, queue_size=1)
    pub = rospy.Publisher('/image_detections', Detection2DArray, queue_size=10)
    rate = rospy.Rate(1)  # 10hz

    path_list = os.listdir(path)
    path_list.sort(key=lambda x: int(x.split('.')[0]))

    stamp_file = open(stamp_path)
    stamp_lines = stamp_file.readlines()
    stamp_i = 0

    for filename in path_list:
        img_path = filename
        cur_frame = img_path[:-4]
        img_path = path + "/" + img_path

        cur_stamp = ((float)(stamp_lines[stamp_i][-13:].strip('\n')))
        # cur_stamp = rospy.Time.from_sec(
        #     ((float)(stamp_lines[stamp_i][-13:].strip('\n'))))
        stamp_i += 1

        detection_results = Detection2DArray()

        # tf bilinear interpolation is different from any other's, just make do
        input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
        input_size = input_sizes[
            compound_coef] if force_input_size is None else force_input_size
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path,
                                                         max_size=input_size)

        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        model = EfficientDetBackbone(compound_coef=compound_coef,
                                     num_classes=len(obj_list),
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)
        model.load_state_dict(
            torch.load(f'weights/efficientdet-d{compound_coef}.pth',
                       map_location='cpu'))
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model = model.cuda()
        if use_float16:
            model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)

        out = invert_affine(framed_metas, out)

        display(cur_frame, out, ori_imgs, imshow=False, imwrite=True)

        for i in range(len(out)):
            for j in range(len(out[i]['rois'])):
                x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int)
                obj = obj_list[out[i]['class_ids'][j]]
                score = float(out[i]['scores'][j])

                result = ObjectHypothesisWithPose()
                result.score = score
                if (obj == 'car'):
                    result.id = 0
                if (obj == 'person'):
                    result.id = 1
                if (obj == 'cyclist'):
                    result.id = 2

                detection_msg = Detection2D()
                detection_msg.bbox.center.x = (x1 + x2) / 2
                detection_msg.bbox.center.y = (y1 + y2) / 2
                detection_msg.bbox.size_x = x2 - x1
                detection_msg.bbox.size_y = y2 - y1

                detection_msg.results.append(result)
                detection_results.detections.append(detection_msg)
                rospy.loginfo("%d: %lf", detection_msg.results[0].id,
                              detection_msg.results[0].score)

            detection_results.header.seq = cur_frame
            #detection_results.header.stamp = cur_stamp
            rospy.loginfo(detection_results.header.stamp)
            pub.publish(detection_results)

            if not os.path.exists(txt_path):
                os.makedirs(txt_path)
            #with open(f'txt/{cur_frame}.txt', 'w') as f:
            with open(f'{txt_path}/{cur_frame}.txt', 'w') as f:
                #f.write(str((float)(stamp_lines[stamp_i][-13:].strip('\n'))) + "\n")
                f.write(str(cur_stamp) + "\n")
                for detection in detection_results.detections:
                    f.write(str(detection.bbox.center.x) + " ")
                    f.write(str(detection.bbox.center.y) + " ")
                    f.write(str(detection.bbox.size_x) + " ")
                    f.write(str(detection.bbox.size_y) + " ")
                    f.write(str(detection.results[0].id) + " ")
                    f.write(str(detection.results[0].score) + "\n")
            f.close()

            rate.sleep()

        print('running speed test...')
        with torch.no_grad():
            print('test1: model inferring and postprocessing')
            print('inferring image for 10 times...')
            t1 = time.time()
            for _ in range(10):
                _, regression, classification, anchors = model(x)

                out = postprocess(x, anchors, regression, classification,
                                  regressBoxes, clipBoxes, threshold,
                                  iou_threshold)
                out = invert_affine(framed_metas, out)

            t2 = time.time()
            tact_time = (t2 - t1) / 10
            print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')