Beispiel #1
0
    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        loc, conf, land = self.model(x)

        conf = F.softmax(conf, dim=-1)

        boxes = decode(loc.data[0], self.prior_box, self.variance)

        boxes *= self.scale_bboxes
        scores = conf[0][:, 1]

        landmarks = decode_landm(land.data[0], self.prior_box, self.variance)
        landmarks *= self.scale_landmarks

        # ignore low scores
        valid_index = torch.where(scores > self.confidence_threshold)[0]
        boxes = boxes[valid_index]
        landmarks = landmarks[valid_index]
        scores = scores[valid_index]

        # do NMS
        keep = nms(boxes, scores, self.nms_threshold)
        boxes = boxes[keep, :]

        landmarks = landmarks[keep]
        scores = scores[keep]
        return boxes, scores, landmarks
Beispiel #2
0
    def predict_jsons(
        self, image: np.array, confidence_threshold: float = 0.7, nms_threshold: float = 0.4
    ) -> List[Dict[str, Union[List, float]]]:
        with torch.no_grad():
            original_height, original_width = image.shape[:2]

            scale_landmarks = torch.from_numpy(np.tile([self.max_size, self.max_size], 5)).to(self.device)
            scale_bboxes = torch.from_numpy(np.tile([self.max_size, self.max_size], 2)).to(self.device)

            transformed_image = self.transform(image=image)["image"]

            paded = pad_to_size(target_size=(self.max_size, self.max_size), image=transformed_image)

            pads = paded["pads"]

            torched_image = tensor_from_rgb_image(paded["image"]).to(self.device)

            loc, conf, land = self.model(torched_image.unsqueeze(0))

            conf = F.softmax(conf, dim=-1)

            annotations: List[Dict[str, Union[List, float]]] = []

            boxes = decode(loc.data[0], self.prior_box, self.variance)

            boxes *= scale_bboxes
            scores = conf[0][:, 1]

            landmarks = decode_landm(land.data[0], self.prior_box, self.variance)
            landmarks *= scale_landmarks

            # ignore low scores
            valid_index = torch.where(scores > confidence_threshold)[0]
            boxes = boxes[valid_index]
            landmarks = landmarks[valid_index]
            scores = scores[valid_index]

            # Sort from high to low
            order = scores.argsort(descending=True)
            boxes = boxes[order]
            landmarks = landmarks[order]
            scores = scores[order]

            # do NMS
            keep = nms(boxes, scores, nms_threshold)
            boxes = boxes[keep, :].int()

            if boxes.shape[0] == 0:
                return [{"bbox": [], "score": -1, "landmarks": []}]

            landmarks = landmarks[keep]

            scores = scores[keep].cpu().numpy().astype(np.float64)
            boxes = boxes.cpu().numpy()
            landmarks = landmarks.cpu().numpy()
            landmarks = landmarks.reshape([-1, 2])

            unpadded = unpad_from_size(pads, bboxes=boxes, keypoints=landmarks)

            resize_coeff = max(original_height, original_width) / self.max_size

            boxes = (unpadded["bboxes"] * resize_coeff).astype(int)
            landmarks = (unpadded["keypoints"].reshape(-1, 10) * resize_coeff).astype(int)

            for box_id, bbox in enumerate(boxes):
                x_min, y_min, x_max, y_max = bbox

                x_min = np.clip(x_min, 0, original_width - 1)
                x_max = np.clip(x_max, x_min + 1, original_width - 1)

                if x_min >= x_max:
                    continue

                y_min = np.clip(y_min, 0, original_height - 1)
                y_max = np.clip(y_max, y_min + 1, original_height - 1)

                if y_min >= y_max:
                    continue

                annotations += [
                    {
                        "bbox": bbox.tolist(),
                        "score": scores[box_id],
                        "landmarks": landmarks[box_id].reshape(-1, 2).tolist(),
                    }
                ]

            return annotations
Beispiel #3
0
    def predict_jsons(
            self,
            image: np.ndarray,
            confidence_threshold: float = 0.7,
            nms_threshold: float = 0.4) -> List[Dict[str, Union[List, float]]]:
        with torch.no_grad():
            original_height, original_width = image.shape[:2]

            transformed_image = self.transform(image=image)["image"]

            transformed_height, transformed_width = transformed_image.shape[:2]
            transformed_size = (transformed_width, transformed_height)

            scale_landmarks = torch.from_numpy(np.tile(transformed_size,
                                                       5)).to(self.device)
            scale_bboxes = torch.from_numpy(np.tile(transformed_size,
                                                    2)).to(self.device)

            prior_box = priorbox(
                min_sizes=[[16, 32], [64, 128], [256, 512]],
                steps=[8, 16, 32],
                clip=False,
                image_size=transformed_image.shape[:2],
            ).to(self.device)

            torched_image = tensor_from_rgb_image(transformed_image).to(
                self.device)

            loc, conf, land = self.model(torched_image.unsqueeze(0))  # pylint: disable=E1102

            conf = F.softmax(conf, dim=-1)

            annotations: List[Dict[str, Union[List, float]]] = []

            boxes = decode(loc.data[0], prior_box, self.variance)

            boxes *= scale_bboxes
            scores = conf[0][:, 1]

            landmarks = decode_landm(land.data[0], prior_box, self.variance)
            landmarks *= scale_landmarks

            # ignore low scores
            valid_index = torch.where(scores > confidence_threshold)[0]
            boxes = boxes[valid_index]
            landmarks = landmarks[valid_index]
            scores = scores[valid_index]

            # do NMS
            keep = nms(boxes, scores, nms_threshold)
            boxes = boxes[keep, :]

            if boxes.shape[0] == 0:
                return [{"bbox": [], "score": -1, "landmarks": []}]

            landmarks = landmarks[keep]

            scores = scores[keep].cpu().numpy().astype(float)

            boxes_np = boxes.cpu().numpy()
            landmarks_np = landmarks.cpu().numpy()
            resize_coeff = original_height / transformed_height

            boxes *= resize_coeff
            landmarks_np = landmarks_np.reshape(-1, 10) * resize_coeff

            for box_id, bbox in enumerate(boxes_np):
                x_min, y_min, x_max, y_max = bbox

                x_min = np.clip(x_min, 0, original_width - 1)
                x_max = np.clip(x_max, x_min + 1, original_width - 1)

                if x_min >= x_max:
                    continue

                y_min = np.clip(y_min, 0, original_height - 1)
                y_max = np.clip(y_max, y_min + 1, original_height - 1)

                if y_min >= y_max:
                    continue

                annotations += [{
                    "bbox":
                    np.round(bbox.astype(float), ROUNDING_DIGITS).tolist(),
                    "score":
                    np.round(scores, ROUNDING_DIGITS)[box_id],
                    "landmarks":
                    np.round(landmarks_np[box_id].astype(float),
                             ROUNDING_DIGITS).reshape(-1, 2).tolist(),
                }]

            return annotations
Beispiel #4
0
def process_predictions(prediction, original_shapes, input_shape, pads,
                        confidence_threshold, nms_threshold, prior_box,
                        variance):
    loc, conf, land = prediction

    conf = F.softmax(conf, dim=-1)

    result: List[List[Dict[str, Union[List, float]]]] = []

    batch_size, _, image_height, image_width = input_shape

    scale1 = torch.from_numpy(np.tile([image_width, image_height],
                                      5)).to(loc.device)
    scale = torch.from_numpy(np.tile([image_width, image_height],
                                     2)).to(loc.device)

    for batch_id in range(batch_size):
        annotations: List[Dict[str, Union[List, float]]] = []

        boxes = decode(loc.data[batch_id], prior_box.to(loc.device), variance)

        boxes *= scale
        scores = conf[batch_id][:, 1]

        landmarks = decode_landm(land.data[batch_id],
                                 prior_box.to(land.device), variance)
        landmarks *= scale1

        # ignore low scores
        valid_index = torch.where(scores > confidence_threshold)[0]
        boxes = boxes[valid_index]
        landmarks = landmarks[valid_index]
        scores = scores[valid_index]

        order = scores.argsort(descending=False)

        boxes = boxes[order]
        landmarks = landmarks[order]
        scores = scores[order]

        # do NMS
        keep = nms(boxes, scores, nms_threshold)
        boxes = boxes[keep, :].int()

        if boxes.shape[0] == 0:
            result += [[{"bbox": [], "score": -1, "landmarks": []}]]
            continue

        landmarks = landmarks[keep]
        scores = scores[keep].cpu().numpy().astype(np.float64)

        boxes = boxes.cpu().numpy()
        landmarks = landmarks.cpu().numpy().reshape([-1, 2])

        if pads is None:
            pads_numpy = np.array([0, 0, 0, 0])
        else:
            pads_numpy = pads[batch_id]

        unpadded = unpad_from_size(pads_numpy,
                                   bboxes=boxes,
                                   keypoints=landmarks)

        resize_coeff = max(original_shapes[batch_id]) / max(
            image_height, image_width)

        boxes = (unpadded["bboxes"] * resize_coeff).astype(int)
        landmarks = (unpadded["keypoints"].reshape(-1, 10) *
                     resize_coeff).astype(int)

        for crop_id, bbox in enumerate(boxes):
            annotations += [{
                "bbox":
                bbox.tolist(),
                "score":
                scores[crop_id],
                "landmarks":
                landmarks[crop_id].reshape(-1, 2).tolist(),
            }]

        result += [annotations]

    return result
Beispiel #5
0
    def test_step(self, batch: Dict[str, torch.Tensor],
                  batch_idx: int) -> None:
        torched_images = batch["torched_image"]
        resizes = batch["resize"]
        image_paths = batch["image_path"]
        raw_images = batch["raw_image"]

        labels: List[Dict[str, Any]] = []

        loc, conf, land = self.model(torched_images)
        conf = F.softmax(conf, dim=-1)

        batch_size = torched_images.shape[0]

        image_height, image_width = torched_images.shape[2:]

        scale1 = torch.from_numpy(np.tile([image_width, image_height],
                                          5)).to(self.device)
        scale = torch.from_numpy(np.tile([image_width, image_height],
                                         2)).to(self.device)

        priors = object_from_dict(hparams["prior_box"],
                                  image_size=(image_height,
                                              image_width)).to(loc.device)

        for batch_id in range(batch_size):
            image_path = image_paths[batch_id]
            file_id = Path(str(image_path)).stem
            raw_image = raw_images[batch_id]

            resize = resizes[batch_id].float()

            boxes = decode(loc.data[batch_id], priors,
                           hparams["test_parameters"]["variance"])

            boxes *= scale / resize
            scores = conf[batch_id][:, 1]

            landmarks = decode_landm(land.data[batch_id], priors,
                                     hparams["test_parameters"]["variance"])
            landmarks *= scale1 / resize

            # ignore low scores
            valid_index = torch.where(
                scores > self.hparams["confidence_threshold"])[0]
            boxes = boxes[valid_index]
            landmarks = landmarks[valid_index]
            scores = scores[valid_index]

            order = scores.argsort(descending=True)

            boxes = boxes[order]
            landmarks = landmarks[order]
            scores = scores[order]

            # do NMS
            keep = nms(boxes, scores, self.hparams["nms_threshold"])
            boxes = boxes[keep, :].int()

            if boxes.shape[0] == 0:
                continue

            landmarks = landmarks[keep].int()
            scores = scores[keep].cpu().numpy().astype(np.float64)

            boxes = boxes[:self.hparams["keep_top_k"]]
            landmarks = landmarks[:self.hparams["keep_top_k"]]
            scores = scores[:self.hparams["keep_top_k"]]

            if self.hparams["visualize"]:
                vis_image = raw_image.cpu().numpy().copy()

                for crop_id, bbox in enumerate(boxes):
                    landms = landmarks[crop_id].cpu().numpy().reshape([5, 2])

                    colors = [(255, 0, 0), (128, 255, 0), (255, 178, 102),
                              (102, 128, 255), (0, 255, 255)]
                    for i, (x, y) in enumerate(landms):
                        vis_image = cv2.circle(vis_image, (x, y),
                                               radius=3,
                                               color=colors[i],
                                               thickness=3)

                    x_min, y_min, x_max, y_max = bbox.cpu().numpy()

                    x_min = np.clip(x_min, 0, x_max - 1)
                    y_min = np.clip(y_min, 0, y_max - 1)

                    vis_image = cv2.rectangle(vis_image, (x_min, y_min),
                                              (x_max, y_max),
                                              color=(0, 255, 0),
                                              thickness=2)

                    cv2.imwrite(str(self.output_vis_path / f"{file_id}.jpg"),
                                cv2.cvtColor(vis_image, cv2.COLOR_BGR2RGB))

            for crop_id, bbox in enumerate(boxes):
                bbox = bbox.cpu().numpy()

                labels += [{
                    "crop_id": crop_id,
                    "bbox": bbox.tolist(),
                    "score": scores[crop_id],
                    "landmarks": landmarks[crop_id].tolist(),
                }]

            result = {
                "file_path": image_path,
                "file_id": file_id,
                "bboxes": labels
            }

            with open(self.output_label_path / f"{file_id}.json", "w") as f:
                json.dump(result, f, indent=2)
Beispiel #6
0
    def validation_step(self, batch: Dict[str, torch.Tensor],
                        batch_idx: int):  # type: ignore
        images = batch["image"]

        image_height = images.shape[2]
        image_width = images.shape[3]

        annotations = batch["annotation"]
        file_names = batch["file_name"]

        out = self.forward(images)

        location, confidence, _ = out

        confidence = F.softmax(confidence, dim=-1)
        batch_size = location.shape[0]

        predictions_coco: List[Dict[str, Any]] = []

        scale = torch.from_numpy(np.tile([image_width, image_height],
                                         2)).to(location.device)

        for batch_id in range(batch_size):
            boxes = decode(location.data[batch_id],
                           self.prior_box.to(images.device),
                           self.config.test_parameters.variance)
            scores = confidence[batch_id][:, 1]

            valid_index = torch.where(scores > 0.1)[0]
            boxes = boxes[valid_index]
            scores = scores[valid_index]

            boxes *= scale

            # do NMS
            keep = nms(boxes, scores, self.config.val_parameters.iou_threshold)
            boxes = boxes[keep, :].cpu().numpy()

            if boxes.shape[0] == 0:
                continue

            scores = scores[keep].cpu().numpy()

            file_name = file_names[batch_id]

            for box_id, bbox in enumerate(boxes):
                x_min, y_min, x_max, y_max = bbox

                x_min = np.clip(x_min, 0, x_max - 1)
                y_min = np.clip(y_min, 0, y_max - 1)

                predictions_coco += [{
                    "id":
                    str(hash(f"{file_name}_{box_id}")),
                    "image_id":
                    file_name,
                    "category_id":
                    1,
                    "bbox": [x_min, y_min, x_max - x_min, y_max - y_min],
                    "score":
                    scores[box_id],
                }]

        gt_coco: List[Dict[str, Any]] = []

        for batch_id, annotation_list in enumerate(annotations):
            for annotation in annotation_list:
                x_min, y_min, x_max, y_max = annotation[:4]
                file_name = file_names[batch_id]

                gt_coco += [{
                    "id":
                    str(hash(f"{file_name}_{batch_id}")),
                    "image_id":
                    file_name,
                    "category_id":
                    1,
                    "bbox": [
                        x_min.item() * image_width,
                        y_min.item() * image_height,
                        (x_max - x_min).item() * image_width,
                        (y_max - y_min).item() * image_height,
                    ],
                }]

        return OrderedDict({"predictions": predictions_coco, "gt": gt_coco})
    def detect_faces(self,
                     img_raw,
                     confidence_threshold=0.9,
                     top_k=5000,
                     nms_threshold=0.4,
                     keep_top_k=750,
                     resize=1):
        img = np.float32(img_raw)
        im_height, im_width = img.shape[:2]
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        # tic = time.time()
        with torch.no_grad():
            loc, conf, landms = self.model(img)  # forward pass
            # print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(cfg_mnet, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg_mnet['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg_mnet['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(self.device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:keep_top_k, :]
        landms = landms[:keep_top_k, :]
        # print(landms.shape)
        landms = landms.reshape((-1, 5, 2))
        # print(landms.shape)
        landms = landms.transpose((0, 2, 1))
        # print(landms.shape)
        landms = landms.reshape(
            -1,
            10,
        )
        # print(landms.shape)

        return dets, landms