def filter_results(self, scores, boxes): # in order to avoid custom C++ extensions # we use an NMS implementation written purely # on python. This implementation is faster on the # CPU, which is why we run this part on the CPU cpu_device = torch.device("cpu") #boxes = boxes[0] #scores = scores[0] boxes = boxes.to(cpu_device) scores = scores.to(cpu_device) selected_box_probs = [] labels = [] for class_index in range(1, scores.size(1)): probs = scores[:, class_index] mask = probs > self.score_threshold probs = probs[mask] subset_boxes = boxes[mask, :] box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) box_probs = nms(box_probs, self.nms_threshold) selected_box_probs.append(box_probs) labels.append( torch.full((box_probs.size(0), ), class_index, dtype=torch.int64)) selected_box_probs = torch.cat(selected_box_probs) labels = torch.cat(labels) return selected_box_probs[:, :4], labels, selected_box_probs[:, 4]
def yolact_postprocess( outputs, scale_x, scale_y, frame_height, frame_width, input_height, input_width, conf_threshold ): boxes = outputs['boxes'][0] conf = np.transpose(outputs['conf'][0]) masks = outputs['mask'][0] proto = outputs['proto'][0] num_classes = conf.shape[0] idx_lst, cls_lst, scr_lst = [], [], [] shift_x = (input_width - (frame_width * scale_x)) / frame_width shift_y = (input_height - (frame_height * scale_y)) / frame_height for cls in range(1, num_classes): cls_scores = conf[cls, :] idx = np.arange(cls_scores.shape[0]) conf_mask = cls_scores > conf_threshold cls_scores = cls_scores[conf_mask] idx = idx[conf_mask] if cls_scores.shape[0] == 0: continue x1, x2 = sanitize_coordinates(boxes[idx, 0], boxes[idx, 2], frame_width) y1, y2 = sanitize_coordinates(boxes[idx, 1], boxes[idx, 3], frame_height) keep = nms(x1, y1, x2, y2, cls_scores, 0.5) idx_lst.append(idx[keep]) cls_lst.append(np.full(len(keep), cls)) scr_lst.append(cls_scores[keep]) if not idx_lst: return np.array([]), np.array([]), np.array([]), np.array([]) idx = np.concatenate(idx_lst, axis=0) classes = np.concatenate(cls_lst, axis=0) scores = np.concatenate(scr_lst, axis=0) idx2 = np.argsort(scores, axis=0)[::-1] scores = scores[idx2] idx = idx[idx2] classes = classes[idx2] boxes = boxes[idx] masks = masks[idx] if np.size(boxes) > 0: boxes, scores, classes, masks = yolact_segm_postprocess( boxes, masks, scores, classes, proto, frame_width, frame_height, shift_x=shift_x, shift_y=shift_y ) return scores, classes, boxes, masks
def test_detect(): if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') model = YOLOv5(80).to(device) print(load_params(model, "weights/yolov5s_coco.pth", strict=False)) dataset = LoadImages("./images", 640, 32) model.float().fuse().eval().requires_grad_(False) half = (device.type != 'cpu') # half = False model.half() if half else None sum_t = 0. # 预处理 x, img0, path = dataset[0] x = x.to(device) x = x.half() if half else x.float() x /= 255 x = x[None] if x.dim() == 3 else x model_info(model, x.shape[-2:]) for i in range(200): t = time.time() target = model(x)[0] target = nms(target, 0.2, 0.45) if i >= 5: sum_t += time.time() - t # 后处理 target = target[0] convert_boxes(target, x.shape[-2:], img0.shape[:2]) boxes = target[:, :4].cpu().numpy() scores = target[:, 4].cpu().numpy() labels = target[:, 5].cpu().numpy() img = img0.copy() # 画图 draw_target_in_image(img, boxes, labels, scores, "coco") img = resize_max(img, 720, 1080) cv.imshow("1", img) cv.waitKey(0) print(sum_t / (200 - 5))
def test_test(): img_path_list, target_list = \ make_dataset(r"D:\datasets\VOCdevkit\VOC0712\JPEGImages", r"D:\datasets\VOCdevkit\VOC0712\Annotations", r"D:\datasets\VOCdevkit\VOC0712\pkl\voc_0712_test.pkl", r"D:\datasets\VOCdevkit\VOC0712\ImageSets\Main\test.txt", "voc", False) dataset = LoadImagesAndLabels(img_path_list, target_list, 640, 32, 0.5, False, {"batch_size": 1}) # x: Tensor[C, H, W], target_out: Tensor[X, 6], path: str. [idx, cls, *xywh] # test show def test1(): for x, target, img_path in dataset: print(x.shape, target, img_path) x = x.numpy() x = x.transpose(1, 2, 0)[:, :, ::-1] # to (H, W, C), RGB to BGR, x = np.ascontiguousarray(x) h, w = x.shape[:2] boxes = target[:, 2:].numpy() labels = target[:, 1].numpy() boxes = cxcywh2ltrb(boxes) boxes[:, 0::2] *= w # lr boxes[:, 1::2] *= h # tb draw_target_in_image(x, boxes, labels, None, "voc") cv.imshow("1", x) cv.waitKey(0) # test1() if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') model = YOLOv5(20).to(device) print(load_params(model, "weights/yolov5s_voc.pth", strict=False)) model.float().fuse().eval().requires_grad_(False) half = (device.type != 'cpu') model.half() if half else None hyp = { "obj_pw": 0.911, "cls_pw": 0.631, "anchor_t": 2.91, "box_lw": 0.0296, "obj_lw": 0.301, "cls_lw": 0.06075 } loss_func = Loss(model, hyp) loss = torch.zeros((4, ), device=device) for x, target0, img_path in reversed(dataset): img0 = x.numpy() img0 = img0.transpose(1, 2, 0)[:, :, ::-1] # to (H, W, C), RGB to BGR, img0 = np.ascontiguousarray(img0) img = img0.copy() # 预处理 x, target0 = x.to(device), target0.to(device) x = x.half() if half else x.float() x /= 255 if x.dim() == 3: x = x[None] # 预测 target, loss_target = model(x) loss += loss_func([loss_t.float() for loss_t in loss_target], target0)[1] target = nms(target, 0.001, 0.6) # 后处理 # 1 target = target[0] boxes = target[:, :4].cpu().numpy() scores = target[:, 4].cpu().numpy() labels = target[:, 5].cpu().numpy() draw_target_in_image(img, boxes, labels, scores, "voc") cv.imshow("pred", img) # 2 img2 = img0.copy() h, w = img2.shape[:2] boxes = target0[:, 2:].cpu().numpy() labels = target0[:, 1].cpu().numpy() boxes = cxcywh2ltrb(boxes) boxes[:, 0::2] *= w # lr boxes[:, 1::2] *= h # tb draw_target_in_image(img2, boxes, labels, None, "voc") cv.imshow("target", img2) cv.waitKey(0)