def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: loc, conf, land = self.model(x) conf = F.softmax(conf, dim=-1) boxes = decode(loc.data[0], self.prior_box, self.variance) boxes *= self.scale_bboxes scores = conf[0][:, 1] landmarks = decode_landm(land.data[0], self.prior_box, self.variance) landmarks *= self.scale_landmarks # ignore low scores valid_index = torch.where(scores > self.confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] # do NMS keep = nms(boxes, scores, self.nms_threshold) boxes = boxes[keep, :] landmarks = landmarks[keep] scores = scores[keep] return boxes, scores, landmarks
def predict_jsons( self, image: np.array, confidence_threshold: float = 0.7, nms_threshold: float = 0.4 ) -> List[Dict[str, Union[List, float]]]: with torch.no_grad(): original_height, original_width = image.shape[:2] scale_landmarks = torch.from_numpy(np.tile([self.max_size, self.max_size], 5)).to(self.device) scale_bboxes = torch.from_numpy(np.tile([self.max_size, self.max_size], 2)).to(self.device) transformed_image = self.transform(image=image)["image"] paded = pad_to_size(target_size=(self.max_size, self.max_size), image=transformed_image) pads = paded["pads"] torched_image = tensor_from_rgb_image(paded["image"]).to(self.device) loc, conf, land = self.model(torched_image.unsqueeze(0)) conf = F.softmax(conf, dim=-1) annotations: List[Dict[str, Union[List, float]]] = [] boxes = decode(loc.data[0], self.prior_box, self.variance) boxes *= scale_bboxes scores = conf[0][:, 1] landmarks = decode_landm(land.data[0], self.prior_box, self.variance) landmarks *= scale_landmarks # ignore low scores valid_index = torch.where(scores > confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] # Sort from high to low order = scores.argsort(descending=True) boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS keep = nms(boxes, scores, nms_threshold) boxes = boxes[keep, :].int() if boxes.shape[0] == 0: return [{"bbox": [], "score": -1, "landmarks": []}] landmarks = landmarks[keep] scores = scores[keep].cpu().numpy().astype(np.float64) boxes = boxes.cpu().numpy() landmarks = landmarks.cpu().numpy() landmarks = landmarks.reshape([-1, 2]) unpadded = unpad_from_size(pads, bboxes=boxes, keypoints=landmarks) resize_coeff = max(original_height, original_width) / self.max_size boxes = (unpadded["bboxes"] * resize_coeff).astype(int) landmarks = (unpadded["keypoints"].reshape(-1, 10) * resize_coeff).astype(int) for box_id, bbox in enumerate(boxes): x_min, y_min, x_max, y_max = bbox x_min = np.clip(x_min, 0, original_width - 1) x_max = np.clip(x_max, x_min + 1, original_width - 1) if x_min >= x_max: continue y_min = np.clip(y_min, 0, original_height - 1) y_max = np.clip(y_max, y_min + 1, original_height - 1) if y_min >= y_max: continue annotations += [ { "bbox": bbox.tolist(), "score": scores[box_id], "landmarks": landmarks[box_id].reshape(-1, 2).tolist(), } ] return annotations
def predict_jsons( self, image: np.ndarray, confidence_threshold: float = 0.7, nms_threshold: float = 0.4) -> List[Dict[str, Union[List, float]]]: with torch.no_grad(): original_height, original_width = image.shape[:2] transformed_image = self.transform(image=image)["image"] transformed_height, transformed_width = transformed_image.shape[:2] transformed_size = (transformed_width, transformed_height) scale_landmarks = torch.from_numpy(np.tile(transformed_size, 5)).to(self.device) scale_bboxes = torch.from_numpy(np.tile(transformed_size, 2)).to(self.device) prior_box = priorbox( min_sizes=[[16, 32], [64, 128], [256, 512]], steps=[8, 16, 32], clip=False, image_size=transformed_image.shape[:2], ).to(self.device) torched_image = tensor_from_rgb_image(transformed_image).to( self.device) loc, conf, land = self.model(torched_image.unsqueeze(0)) # pylint: disable=E1102 conf = F.softmax(conf, dim=-1) annotations: List[Dict[str, Union[List, float]]] = [] boxes = decode(loc.data[0], prior_box, self.variance) boxes *= scale_bboxes scores = conf[0][:, 1] landmarks = decode_landm(land.data[0], prior_box, self.variance) landmarks *= scale_landmarks # ignore low scores valid_index = torch.where(scores > confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] # do NMS keep = nms(boxes, scores, nms_threshold) boxes = boxes[keep, :] if boxes.shape[0] == 0: return [{"bbox": [], "score": -1, "landmarks": []}] landmarks = landmarks[keep] scores = scores[keep].cpu().numpy().astype(float) boxes_np = boxes.cpu().numpy() landmarks_np = landmarks.cpu().numpy() resize_coeff = original_height / transformed_height boxes *= resize_coeff landmarks_np = landmarks_np.reshape(-1, 10) * resize_coeff for box_id, bbox in enumerate(boxes_np): x_min, y_min, x_max, y_max = bbox x_min = np.clip(x_min, 0, original_width - 1) x_max = np.clip(x_max, x_min + 1, original_width - 1) if x_min >= x_max: continue y_min = np.clip(y_min, 0, original_height - 1) y_max = np.clip(y_max, y_min + 1, original_height - 1) if y_min >= y_max: continue annotations += [{ "bbox": np.round(bbox.astype(float), ROUNDING_DIGITS).tolist(), "score": np.round(scores, ROUNDING_DIGITS)[box_id], "landmarks": np.round(landmarks_np[box_id].astype(float), ROUNDING_DIGITS).reshape(-1, 2).tolist(), }] return annotations
def process_predictions(prediction, original_shapes, input_shape, pads, confidence_threshold, nms_threshold, prior_box, variance): loc, conf, land = prediction conf = F.softmax(conf, dim=-1) result: List[List[Dict[str, Union[List, float]]]] = [] batch_size, _, image_height, image_width = input_shape scale1 = torch.from_numpy(np.tile([image_width, image_height], 5)).to(loc.device) scale = torch.from_numpy(np.tile([image_width, image_height], 2)).to(loc.device) for batch_id in range(batch_size): annotations: List[Dict[str, Union[List, float]]] = [] boxes = decode(loc.data[batch_id], prior_box.to(loc.device), variance) boxes *= scale scores = conf[batch_id][:, 1] landmarks = decode_landm(land.data[batch_id], prior_box.to(land.device), variance) landmarks *= scale1 # ignore low scores valid_index = torch.where(scores > confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] order = scores.argsort(descending=False) boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS keep = nms(boxes, scores, nms_threshold) boxes = boxes[keep, :].int() if boxes.shape[0] == 0: result += [[{"bbox": [], "score": -1, "landmarks": []}]] continue landmarks = landmarks[keep] scores = scores[keep].cpu().numpy().astype(np.float64) boxes = boxes.cpu().numpy() landmarks = landmarks.cpu().numpy().reshape([-1, 2]) if pads is None: pads_numpy = np.array([0, 0, 0, 0]) else: pads_numpy = pads[batch_id] unpadded = unpad_from_size(pads_numpy, bboxes=boxes, keypoints=landmarks) resize_coeff = max(original_shapes[batch_id]) / max( image_height, image_width) boxes = (unpadded["bboxes"] * resize_coeff).astype(int) landmarks = (unpadded["keypoints"].reshape(-1, 10) * resize_coeff).astype(int) for crop_id, bbox in enumerate(boxes): annotations += [{ "bbox": bbox.tolist(), "score": scores[crop_id], "landmarks": landmarks[crop_id].reshape(-1, 2).tolist(), }] result += [annotations] return result
def test_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> None: torched_images = batch["torched_image"] resizes = batch["resize"] image_paths = batch["image_path"] raw_images = batch["raw_image"] labels: List[Dict[str, Any]] = [] loc, conf, land = self.model(torched_images) conf = F.softmax(conf, dim=-1) batch_size = torched_images.shape[0] image_height, image_width = torched_images.shape[2:] scale1 = torch.from_numpy(np.tile([image_width, image_height], 5)).to(self.device) scale = torch.from_numpy(np.tile([image_width, image_height], 2)).to(self.device) priors = object_from_dict(hparams["prior_box"], image_size=(image_height, image_width)).to(loc.device) for batch_id in range(batch_size): image_path = image_paths[batch_id] file_id = Path(str(image_path)).stem raw_image = raw_images[batch_id] resize = resizes[batch_id].float() boxes = decode(loc.data[batch_id], priors, hparams["test_parameters"]["variance"]) boxes *= scale / resize scores = conf[batch_id][:, 1] landmarks = decode_landm(land.data[batch_id], priors, hparams["test_parameters"]["variance"]) landmarks *= scale1 / resize # ignore low scores valid_index = torch.where( scores > self.hparams["confidence_threshold"])[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] order = scores.argsort(descending=True) boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS keep = nms(boxes, scores, self.hparams["nms_threshold"]) boxes = boxes[keep, :].int() if boxes.shape[0] == 0: continue landmarks = landmarks[keep].int() scores = scores[keep].cpu().numpy().astype(np.float64) boxes = boxes[:self.hparams["keep_top_k"]] landmarks = landmarks[:self.hparams["keep_top_k"]] scores = scores[:self.hparams["keep_top_k"]] if self.hparams["visualize"]: vis_image = raw_image.cpu().numpy().copy() for crop_id, bbox in enumerate(boxes): landms = landmarks[crop_id].cpu().numpy().reshape([5, 2]) colors = [(255, 0, 0), (128, 255, 0), (255, 178, 102), (102, 128, 255), (0, 255, 255)] for i, (x, y) in enumerate(landms): vis_image = cv2.circle(vis_image, (x, y), radius=3, color=colors[i], thickness=3) x_min, y_min, x_max, y_max = bbox.cpu().numpy() x_min = np.clip(x_min, 0, x_max - 1) y_min = np.clip(y_min, 0, y_max - 1) vis_image = cv2.rectangle(vis_image, (x_min, y_min), (x_max, y_max), color=(0, 255, 0), thickness=2) cv2.imwrite(str(self.output_vis_path / f"{file_id}.jpg"), cv2.cvtColor(vis_image, cv2.COLOR_BGR2RGB)) for crop_id, bbox in enumerate(boxes): bbox = bbox.cpu().numpy() labels += [{ "crop_id": crop_id, "bbox": bbox.tolist(), "score": scores[crop_id], "landmarks": landmarks[crop_id].tolist(), }] result = { "file_path": image_path, "file_id": file_id, "bboxes": labels } with open(self.output_label_path / f"{file_id}.json", "w") as f: json.dump(result, f, indent=2)
def detect_faces(self, img_raw, confidence_threshold=0.9, top_k=5000, nms_threshold=0.4, keep_top_k=750, resize=1): img = np.float32(img_raw) im_height, im_width = img.shape[:2] scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) # tic = time.time() with torch.no_grad(): loc, conf, landms = self.model(img) # forward pass # print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg_mnet, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg_mnet['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg_mnet['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(self.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] # print(landms.shape) landms = landms.reshape((-1, 5, 2)) # print(landms.shape) landms = landms.transpose((0, 2, 1)) # print(landms.shape) landms = landms.reshape( -1, 10, ) # print(landms.shape) return dets, landms