Beispiel #1
0
 def forward(ctx, boxes, scores, threshold, top_k=200):
     if scores.size(0) == 0:
         return torch.tensor([], dtype=torch.int), torch.tensor(0)
     if scores.dim() == 1:
         scores = scores.unsqueeze(1)
     if not boxes.is_cuda:
         keep = torch_nms(boxes, scores.flatten(), threshold)
     else:
         keep = EXTENSIONS.nms(torch.cat((boxes, scores), dim=1), threshold,
                               top_k)
     return keep, torch.tensor(keep.size(0))
Beispiel #2
0
    def process_image(self, image_path=None, image=None, show_image=False):
        # 清除无用张量
        torch.cuda.empty_cache()
        assert image_path is None or image is None
        if image_path:
            if self.contain_zh(image_path):
                image_cv2 = cv2.imdecode(
                    np.fromfile(image_path, dtype=np.uint8), -1)
            else:
                image_cv2 = cv2.imread(image_path)
        else:
            image_cv2 = image
        # pre-processing
        image = self.preprocess(image_cv2)

        # post-processing
        detections = self.model(image).view(-1, 5)
        # scale each detection back up to the image

        if self.use_cuda:
            scale = torch.Tensor([
                image_cv2.shape[1], image_cv2.shape[0], image_cv2.shape[1],
                image_cv2.shape[0]
            ]).cuda()
        else:
            scale = torch.Tensor([
                image_cv2.shape[1], image_cv2.shape[0], image_cv2.shape[1],
                image_cv2.shape[0]
            ]).cpu()
        scores = detections[..., 0]
        boxes = detections[..., 1:] * scale

        # filter the boxes whose score is smaller than 0.8
        keep_mask = (scores >= self.thresh) & (boxes[..., -1] > 2.0)
        scores = scores[keep_mask]
        boxes = boxes[keep_mask]
        # print(scores.max())
        h, w = image_cv2.shape[0:2]
        # print(h,w)
        return_images = []
        keep_idx = torch_nms(boxes, scores, iou_threshold=0.4)
        if len(keep_idx) > 0:
            keep_boxes = boxes[keep_idx].cpu().numpy()
            keep_scores = scores[keep_idx].cpu().numpy()
            for box, s in zip(keep_boxes, keep_scores):
                # 放大检测框
                x_length, y_length = abs(box[0] - box[2]), abs(box[1] - box[3])
                add_x = x_length * (base_config.face_enlarge - 1) / 2
                add_y = y_length * (base_config.face_enlarge - 1) / 2
                box_large = box + [-add_x, -add_y, add_x, add_y]
                if box_large[0] < 0:
                    box_large[0] = 0
                if box_large[1] < 0:
                    box_large[1] = 0
                if box_large[2] > w:
                    box_large[2] = w
                if box_large[3] > h:
                    box_large[3] = h
                box = np.array(box, np.int32)
                box_large = np.array(box_large, np.int32)
                crop_face = image_cv2[box_large[1]:box_large[3],
                                      box_large[0]:box_large[2], :]
                return_images.append([crop_face, round(s, 2)])

        print("{} faces are detected in .".format(len(keep_idx)))
        if show_image:
            if len(keep_idx) > 0:
                keep_boxes = boxes[keep_idx].cpu().numpy()
                keep_scores = scores[keep_idx].cpu().numpy()
                for box, s in zip(keep_boxes, keep_scores):
                    cv2.rectangle(image_cv2, (box[0], box[1]),
                                  (box[2], box[3]),
                                  color=(0, 0, 255),
                                  thickness=2)
                    cv2.rectangle(image_cv2, (box[0], box[1] - 20),
                                  (box[0] + 80, box[1] - 2),
                                  color=(0, 255, 0),
                                  thickness=-1)
                    cv2.putText(image_cv2,
                                "{:.2f}".format(s), (box[0], box[1] - 2),
                                cv2.FONT_HERSHEY_SIMPLEX,
                                self.thresh,
                                color=(255, 255, 255),
                                thickness=2)
            cv2.imshow('image', image_cv2)
            cv2.waitKey(0)
        return return_images
Beispiel #3
0
            ]).cuda()
        else:
            scale = torch.Tensor([
                img_cv2.shape[1], img_cv2.shape[0], img_cv2.shape[1],
                img_cv2.shape[0]
            ]).cpu()
        scores = detections[..., 0]
        boxes = detections[..., 1:] * scale

        # filter the boxes whose score is smaller than 0.8
        keep_mask = (scores >= args.thresh) & (boxes[..., -1] > 2.0)
        scores = scores[keep_mask]
        boxes = boxes[keep_mask]
        # print(scores.max())

        keep_idx = torch_nms(boxes, scores, iou_threshold=0.4)
        if len(keep_idx) > 0:
            keep_boxes = boxes[keep_idx].cpu().numpy()
            keep_scores = scores[keep_idx].cpu().numpy()

            for box, s in zip(keep_boxes, keep_scores):
                box = np.array(box, np.int32)
                cv2.rectangle(img_cv2, (box[0], box[1]), (box[2], box[3]),
                              color=(0, 0, 255),
                              thickness=2)
                cv2.rectangle(img_cv2, (box[0], box[1] - 20),
                              (box[0] + 80, box[1] - 2),
                              color=(0, 255, 0),
                              thickness=-1)
                cv2.putText(img_cv2,
                            "{:.2f}".format(s), (box[0], box[1] - 2),
    def get_bounding_boxes(self, x, encoding=None, targets=None):
        from torchvision.ops import nms as torch_nms

        if encoding is None:
            encoding = self.encode_yolo(x)

        outputs = self.yolo_decoder(encoding)

        if targets is not None:
            yoloLossValue = self.yolo_loss(outputs, targets)
        else:
            yoloLossValue = 0

        boxes = []

        for output in outputs:
            # Get detected boxes_detected, labels, confidences, class-scores.
            boxes_normalized_all, class_labels_all, confidences_all, class_scores_all = pred_decode(
                output,
                prob_thresh=self.prob_thresh,
                conf_thresh=self.conf_thresh,
            )
            if boxes_normalized_all.size(0) == 0:
                boxes.append(FloatTensor(outputs.shape[0], 2, 4))
                continue

            # Apply non maximum supression for boxes of each class.
            boxes_normalized, class_labels, probs = [], [], []

            for class_label in range(self.num_classes):
                mask = (class_labels_all == class_label)
                if torch.sum(mask) == 0:
                    continue  # if no box found, skip that class.

                boxes_normalized_masked = boxes_normalized_all[mask]
                class_labels_maked = class_labels_all[mask]
                confidences_masked = confidences_all[mask]
                class_scores_masked = class_scores_all[mask]

                ids = torch_nms(boxes_normalized_masked, confidences_masked,
                                self.nms_thresh)

                boxes_normalized.append(boxes_normalized_masked[ids])
                class_labels.append(class_labels_maked[ids])
                probs.append(confidences_masked[ids] *
                             class_scores_masked[ids])

            boxes_normalized = torch.cat(boxes_normalized, 0)
            class_labels = torch.cat(class_labels, 0)
            probs = torch.cat(probs, 0)

            better_coordinates = FloatTensor(boxes_normalized.shape[0], 2, 4)
            translation = FloatTensor(boxes_normalized.shape[0], 2, 4)
            translation[:, 0, :].fill_(-40)
            translation[:, 1, :].fill_(40)

            center_x = (boxes_normalized[:, 0] +
                        boxes_normalized[:, 2]) / 2 * WIDTH
            center_y = (boxes_normalized[:, 1] +
                        boxes_normalized[:, 3]) / 2 * HEIGHT
            width = (boxes_normalized[:, 2] - boxes_normalized[:, 0]) * WIDTH
            height = (boxes_normalized[:, 3] - boxes_normalized[:, 1]) * HEIGHT

            x1 = center_x - width / 2
            x2 = center_x + width / 2
            x3 = center_x - width / 2
            x4 = center_x + width / 2

            y1 = center_y - height / 2
            y2 = center_y + height / 2
            y3 = center_y + height / 2
            y4 = center_y - height / 2

            better_coordinates[:, 0, 0] = x1
            better_coordinates[:, 0, 1] = x2
            better_coordinates[:, 0, 2] = x3
            better_coordinates[:, 0, 3] = x4

            better_coordinates[:, 1, 0] = y1
            better_coordinates[:, 1, 1] = y2
            better_coordinates[:, 1, 2] = y3
            better_coordinates[:, 1, 3] = y4

            better_coordinates[:, 1, :].mul_(-1)
            # shift back!
            better_coordinates += translation

            under_fourty = better_coordinates < -40
            over_fourty = better_coordinates > 40

            better_coordinates[under_fourty] = -40
            better_coordinates[over_fourty] = 40

            under_40 = better_coordinates < -40
            over_40 = better_coordinates > 40

            better_coordinates[under_40] = -40
            better_coordinates[over_40] = 40

            # reorder corners so it's clockwise from top left
            better_coordinates = better_coordinates[:, :, [0, 2, 3, 1]]

            boxes.append(better_coordinates)

        #print('got this incoming')
        #print(x.shape)
        #print('got these many boxes to look at {}'.format(len(boxes)))
        ##print('it has this many detections {} at one site'.format(boxes[0].shape[0]))
        #print("looks like this")
        #print(boxes[0])
        return tuple(boxes), yoloLossValue