예제 #1
0
    def __call__(self, image: pintu.imaging.Image) -> List[Dict]:
        """
        Perform object detection inference on an image

        :param image:
            Image to detect objects in.

        :return:
            List one dictionary per detected object, each dictionars containing
            the following fields:

            - "class"       : (str)     The name of the object detected.
            - "confidence"  : (float)   The confidence in the detection.
            - "left"        : (float)   Absolute x-coordinate of the left edge.
            - "top"         : (float)   Absolute y-coordinate of the top edge.
            - "right"       : (float)   Absolute x-coordinate of the right edge.
            - "bottom"      : (float)   Absolute y-coordinate of the bottom edge.
        """
        scale = 1.0
        if image.width > image.height:
            scale = float(self.input_shape[1]) / image.width
            w = self.input_shape[1]
            h = int(image.height * scale)
        else:
            scale = float(self.input_shape[0]) / image.height
            h = self.input_shape[0]
            w = int(image.width * scale)

        mat_in = ncnn.Mat.from_pixels_resize(
            image.data,
            ncnn.Mat.PixelType.PIXEL_BGR,
            image.width,
            image.height,
            w,
            h,
        )

        # pad to target_size rectangle
        wpad = (w + 31) // 32 * 32 - w
        hpad = (h + 31) // 32 * 32 - h
        mat_in_pad = ncnn.copy_make_border(
            mat_in,
            hpad // 2,
            hpad - hpad // 2,
            wpad // 2,
            wpad - wpad // 2,
            ncnn.BorderType.BORDER_CONSTANT,
            0,
        )

        # Normalize image
        mat_in_pad.substract_mean_normalize(self.mean_vals, self.norm_vals)

        #
        ex = self.net.create_extractor()
        ex.input(self.input_name, mat_in_pad)

        scores = [ex.extract(x)[1] for x in self.score_output_names]
        scores = [numpy.reshape(x, (-1, 80)) for x in scores]

        raw_boxes = [ex.extract(x)[1] for x in self.boxes_output_names]
        raw_boxes = [numpy.reshape(x, (-1, 32)) for x in raw_boxes]

        # generate centers
        decode_boxes = []
        select_scores = []
        for stride, box_distribute, score in zip(self.strides, raw_boxes,
                                                 scores):
            # centers
            if mat_in_pad.w > mat_in_pad.h:
                fm_w = mat_in_pad.w // stride
                fm_h = score.shape[0] // fm_w
            else:
                fm_h = mat_in_pad.h // stride
                fm_w = score.shape[1] // fm_h
            h_range = numpy.arange(fm_h)
            w_range = numpy.arange(fm_w)
            ww, hh = numpy.meshgrid(w_range, h_range)
            ct_row = (hh.flatten() + 0.5) * stride
            ct_col = (ww.flatten() + 0.5) * stride
            center = numpy.stack((ct_col, ct_row, ct_col, ct_row), axis=1)

            # box distribution to distance
            reg_range = numpy.arange(self.reg_max + 1)
            box_distance = box_distribute.reshape((-1, self.reg_max + 1))
            box_distance = ncnn.utils.functional.softmax(box_distance)
            box_distance = box_distance * numpy.expand_dims(reg_range, axis=0)
            box_distance = numpy.sum(box_distance, axis=1).reshape((-1, 4))
            box_distance = box_distance * stride

            # top K candidate
            topk_idx = numpy.argsort(score.max(axis=1))[::-1]
            topk_idx = topk_idx[:self.num_candidate]
            center = center[topk_idx]
            score = score[topk_idx]
            box_distance = box_distance[topk_idx]

            # decode box
            decode_box: List[int] = center + [-1, -1, 1, 1] * box_distance

            select_scores.append(score)
            decode_boxes.append(decode_box)

        # nms
        bboxes = numpy.concatenate(decode_boxes, axis=0)
        confidences = numpy.concatenate(select_scores, axis=0)
        picked_box = []
        picked_probs = []
        picked_labels = []
        for class_index in range(0, confidences.shape[1]):
            probs = confidences[:, class_index]
            mask = probs > self.prob_threshold
            probs = probs[mask]
            if probs.shape[0] == 0:
                continue
            subset_boxes = bboxes[mask, :]
            picked = ncnn.utils.functional.nms(
                subset_boxes,
                probs,
                iou_threshold=self.nms_threshold,
                top_k=self.top_k,
            )

            picked_box.append(subset_boxes[picked])
            picked_probs.append(probs[picked])
            picked_labels.extend([class_index] * len(picked))

        if not picked_box:
            return []

        picked_box = numpy.concatenate(picked_box)
        picked_probs = numpy.concatenate(picked_probs)

        return [{
            "class":
            str(self.class_names[label]),
            "confidence":
            float(score),
            "left":
            float((bbox[0] - wpad / 2) / scale if bbox[0] > 0 else 0),
            "top":
            float((bbox[1] - hpad / 2) / scale if bbox[1] > 0 else 0),
            "right":
            float((bbox[2] - wpad / 2) /
                  scale if bbox[2] < mat_in_pad.w else mat_in_pad.w / scale),
            "bottom":
            float((bbox[3] - wpad / 2) /
                  scale if bbox[3] < mat_in_pad.h else mat_in_pad.h / scale),
        } for label, score, bbox in zip(picked_labels, picked_probs,
                                        picked_box)]
예제 #2
0
파일: yolov5.py 프로젝트: zzzzt634/ncnn
    def __call__(self, img):
        img_w = img.shape[1]
        img_h = img.shape[0]

        w = img_w
        h = img_h
        scale = 1.0
        if w > h:
            scale = float(self.target_size) / w
            w = self.target_size
            h = int(h * scale)
        else:
            scale = float(self.target_size) / h
            h = self.target_size
            w = int(w * scale)

        mat_in = ncnn.Mat.from_pixels_resize(img,
                                             ncnn.Mat.PixelType.PIXEL_BGR2RGB,
                                             img_w, img_h, w, h)
        # pad to target_size rectangle
        # yolov5/utils/datasets.py letterbox
        wpad = (w + 31) // 32 * 32 - w
        hpad = (h + 31) // 32 * 32 - h
        mat_in_pad = ncnn.copy_make_border(
            mat_in,
            hpad // 2,
            hpad - hpad // 2,
            wpad // 2,
            wpad - wpad // 2,
            ncnn.BorderType.BORDER_CONSTANT,
            114.0,
        )

        mat_in_pad.substract_mean_normalize(self.mean_vals, self.norm_vals)

        ex = self.net.create_extractor()
        ex.input("images", mat_in_pad)

        # anchor setting from yolov5/models/yolov5s.yaml
        ret1, mat_out1 = ex.extract("output")  # stride 8
        ret2, mat_out2 = ex.extract("781")  # stride 16
        ret3, mat_out3 = ex.extract("801")  # stride 32

        pred = [np.array(mat_out3), np.array(mat_out2), np.array(mat_out1)]
        z = []
        for i in range(len(pred)):
            num_grid = pred[i].shape[1]
            if mat_in_pad.w > mat_in_pad.h:
                num_grid_x = mat_in_pad.w // self.stride[i]
                num_grid_y = num_grid // num_grid_x
            else:
                num_grid_y = mat_in_pad.h // self.stride[i]
                num_grid_x = num_grid // num_grid_y
            if (self.grid[i].shape[0] != num_grid_x
                    or self.grid[i].shape[1] != num_grid_y):
                self.grid[i] = make_grid(num_grid_x, num_grid_y)

            y = sigmoid(pred[i])
            y = y.reshape(pred[i].shape[0], num_grid_y, num_grid_x,
                          pred[i].shape[2])
            y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 +
                           self.grid[i]) * self.stride[i]  # xy
            y[..., 2:4] = (y[..., 2:4] * 2)**2 * self.anchor_grid[i]  # wh
            z.append(y.reshape(1, -1, y.shape[-1]))
        pred = np.concatenate(z, 1)

        result = non_max_suppression(pred, self.prob_threshold,
                                     self.nms_threshold)[0]

        objects = [
            Detect_Object(
                obj[5],
                obj[4],
                obj[0] / scale,
                obj[1] / scale,
                (obj[2] - obj[0]) / scale,
                (obj[3] - obj[1]) / scale,
            ) for obj in result
        ]

        return objects
예제 #3
0
    def __call__(self, img):
        img_w = img.shape[1]
        img_h = img.shape[0]

        w = img_w
        h = img_h
        scale = 1.0
        if w > h:
            scale = float(self.target_size) / w
            w = self.target_size
            h = int(h * scale)
        else:
            scale = float(self.target_size) / h
            h = self.target_size
            w = int(w * scale)

        mat_in = ncnn.Mat.from_pixels_resize(img, ncnn.Mat.PixelType.PIXEL_BGR,
                                             img_w, img_h, w, h)

        # pad to target_size rectangle
        wpad = (w + 31) // 32 * 32 - w
        hpad = (h + 31) // 32 * 32 - h
        mat_in_pad = ncnn.copy_make_border(
            mat_in,
            hpad // 2,
            hpad - hpad // 2,
            wpad // 2,
            wpad - wpad // 2,
            ncnn.BorderType.BORDER_CONSTANT,
            0,
        )

        mat_in_pad.substract_mean_normalize(self.mean_vals, self.norm_vals)

        ex = self.net.create_extractor()
        ex.input("input.1", mat_in_pad)

        score_out_name = ["792", "814", "836"]
        scores = [ex.extract(x)[1] for x in score_out_name]
        scores = [np.reshape(x, (-1, 80)) for x in scores]

        boxes_out_name = ["795", "817", "839"]
        raw_boxes = [ex.extract(x)[1] for x in boxes_out_name]
        raw_boxes = [np.reshape(x, (-1, 32)) for x in raw_boxes]

        # generate centers
        decode_boxes = []
        select_scores = []
        for stride, box_distribute, score in zip(self.strides, raw_boxes,
                                                 scores):
            # centers
            if mat_in_pad.w > mat_in_pad.h:
                fm_w = mat_in_pad.w // stride
                fm_h = score.shape[0] // fm_w
            else:
                fm_h = mat_in_pad.h // stride
                fm_w = score.shape[1] // fm_h
            h_range = np.arange(fm_h)
            w_range = np.arange(fm_w)
            ww, hh = np.meshgrid(w_range, h_range)
            ct_row = (hh.flatten() + 0.5) * stride
            ct_col = (ww.flatten() + 0.5) * stride
            center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)

            # box distribution to distance
            reg_range = np.arange(self.reg_max + 1)
            box_distance = box_distribute.reshape((-1, self.reg_max + 1))
            box_distance = softmax(box_distance)
            box_distance = box_distance * np.expand_dims(reg_range, axis=0)
            box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
            box_distance = box_distance * stride

            # top K candidate
            topk_idx = np.argsort(score.max(axis=1))[::-1]
            topk_idx = topk_idx[:self.num_candidate]
            center = center[topk_idx]
            score = score[topk_idx]
            box_distance = box_distance[topk_idx]

            # decode box
            decode_box = center + [-1, -1, 1, 1] * box_distance

            select_scores.append(score)
            decode_boxes.append(decode_box)

        # nms
        bboxes = np.concatenate(decode_boxes, axis=0)
        confidences = np.concatenate(select_scores, axis=0)
        picked_box = []
        picked_probs = []
        picked_labels = []
        for class_index in range(0, confidences.shape[1]):
            probs = confidences[:, class_index]
            mask = probs > self.prob_threshold
            probs = probs[mask]
            if probs.shape[0] == 0:
                continue
            subset_boxes = bboxes[mask, :]
            picked = nms(
                subset_boxes,
                probs,
                iou_threshold=self.nms_threshold,
                top_k=self.top_k,
            )
            picked_box.append(subset_boxes[picked])
            picked_probs.append(probs[picked])
            picked_labels.extend([class_index] * len(picked))

        if not picked_box:
            return []

        picked_box = np.concatenate(picked_box)
        picked_probs = np.concatenate(picked_probs)

        # result with clip
        objects = [
            Detect_Object(
                label,
                score,
                (bbox[0] - wpad / 2) / scale if bbox[0] > 0 else 0,
                (bbox[1] - hpad / 2) / scale if bbox[1] > 0 else 0,
                (bbox[2] - bbox[0]) / scale if bbox[2] < mat_in_pad.w else
                (mat_in_pad.w - bbox[0]) / scale,
                (bbox[3] - bbox[1]) / scale if bbox[3] < mat_in_pad.h else
                (mat_in_pad.h - bbox[1]) / scale,
            ) for label, score, bbox in zip(picked_labels, picked_probs,
                                            picked_box)
        ]

        return objects