Esempio n. 1
0
 def filter_results(self, scores, boxes):
     # in order to avoid custom C++ extensions
     # we use an NMS implementation written purely
     # on python. This implementation is faster on the
     # CPU, which is why we run this part on the CPU
     cpu_device = torch.device("cpu")
     #boxes = boxes[0]
     #scores = scores[0]
     boxes = boxes.to(cpu_device)
     scores = scores.to(cpu_device)
     selected_box_probs = []
     labels = []
     for class_index in range(1, scores.size(1)):
         probs = scores[:, class_index]
         mask = probs > self.score_threshold
         probs = probs[mask]
         subset_boxes = boxes[mask, :]
         box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
         box_probs = nms(box_probs, self.nms_threshold)
         selected_box_probs.append(box_probs)
         labels.append(
             torch.full((box_probs.size(0), ),
                        class_index,
                        dtype=torch.int64))
     selected_box_probs = torch.cat(selected_box_probs)
     labels = torch.cat(labels)
     return selected_box_probs[:, :4], labels, selected_box_probs[:, 4]
Esempio n. 2
0
def yolact_postprocess(
        outputs, scale_x, scale_y, frame_height, frame_width, input_height, input_width, conf_threshold
):
    boxes = outputs['boxes'][0]
    conf = np.transpose(outputs['conf'][0])
    masks = outputs['mask'][0]
    proto = outputs['proto'][0]
    num_classes = conf.shape[0]
    idx_lst, cls_lst, scr_lst = [], [], []
    shift_x = (input_width - (frame_width * scale_x)) / frame_width
    shift_y = (input_height - (frame_height * scale_y)) / frame_height

    for cls in range(1, num_classes):
        cls_scores = conf[cls, :]
        idx = np.arange(cls_scores.shape[0])
        conf_mask = cls_scores > conf_threshold

        cls_scores = cls_scores[conf_mask]
        idx = idx[conf_mask]

        if cls_scores.shape[0] == 0:
            continue
        x1, x2 = sanitize_coordinates(boxes[idx, 0], boxes[idx, 2], frame_width)
        y1, y2 = sanitize_coordinates(boxes[idx, 1], boxes[idx, 3], frame_height)
        keep = nms(x1, y1, x2, y2, cls_scores, 0.5)

        idx_lst.append(idx[keep])
        cls_lst.append(np.full(len(keep), cls))
        scr_lst.append(cls_scores[keep])

    if not idx_lst:
        return np.array([]), np.array([]), np.array([]), np.array([])
    idx = np.concatenate(idx_lst, axis=0)
    classes = np.concatenate(cls_lst, axis=0)
    scores = np.concatenate(scr_lst, axis=0)

    idx2 = np.argsort(scores, axis=0)[::-1]
    scores = scores[idx2]

    idx = idx[idx2]
    classes = classes[idx2]

    boxes = boxes[idx]
    masks = masks[idx]
    if np.size(boxes) > 0:
        boxes, scores, classes, masks = yolact_segm_postprocess(
            boxes, masks, scores, classes, proto, frame_width, frame_height, shift_x=shift_x, shift_y=shift_y
        )
    return scores, classes, boxes, masks
def test_detect():
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model = YOLOv5(80).to(device)
    print(load_params(model, "weights/yolov5s_coco.pth", strict=False))
    dataset = LoadImages("./images", 640, 32)
    model.float().fuse().eval().requires_grad_(False)
    half = (device.type != 'cpu')
    # half = False
    model.half() if half else None
    sum_t = 0.
    # 预处理
    x, img0, path = dataset[0]
    x = x.to(device)
    x = x.half() if half else x.float()
    x /= 255
    x = x[None] if x.dim() == 3 else x
    model_info(model, x.shape[-2:])
    for i in range(200):
        t = time.time()
        target = model(x)[0]
        target = nms(target, 0.2, 0.45)
        if i >= 5:
            sum_t += time.time() - t
        # 后处理
        target = target[0]
        convert_boxes(target, x.shape[-2:], img0.shape[:2])
        boxes = target[:, :4].cpu().numpy()
        scores = target[:, 4].cpu().numpy()
        labels = target[:, 5].cpu().numpy()
        img = img0.copy()
        # 画图
        draw_target_in_image(img, boxes, labels, scores, "coco")
        img = resize_max(img, 720, 1080)
        cv.imshow("1", img)
        cv.waitKey(0)
    print(sum_t / (200 - 5))
def test_test():
    img_path_list, target_list = \
        make_dataset(r"D:\datasets\VOCdevkit\VOC0712\JPEGImages",
                     r"D:\datasets\VOCdevkit\VOC0712\Annotations",
                     r"D:\datasets\VOCdevkit\VOC0712\pkl\voc_0712_test.pkl",
                     r"D:\datasets\VOCdevkit\VOC0712\ImageSets\Main\test.txt", "voc", False)

    dataset = LoadImagesAndLabels(img_path_list, target_list, 640, 32, 0.5,
                                  False, {"batch_size": 1})

    # x: Tensor[C, H, W], target_out: Tensor[X, 6], path: str. [idx, cls, *xywh]
    # test show
    def test1():
        for x, target, img_path in dataset:
            print(x.shape, target, img_path)
            x = x.numpy()
            x = x.transpose(1, 2, 0)[:, :, ::-1]  # to (H, W, C), RGB to BGR,
            x = np.ascontiguousarray(x)
            h, w = x.shape[:2]
            boxes = target[:, 2:].numpy()
            labels = target[:, 1].numpy()
            boxes = cxcywh2ltrb(boxes)
            boxes[:, 0::2] *= w  # lr
            boxes[:, 1::2] *= h  # tb
            draw_target_in_image(x, boxes, labels, None, "voc")
            cv.imshow("1", x)
            cv.waitKey(0)

    # test1()
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model = YOLOv5(20).to(device)
    print(load_params(model, "weights/yolov5s_voc.pth", strict=False))
    model.float().fuse().eval().requires_grad_(False)
    half = (device.type != 'cpu')
    model.half() if half else None
    hyp = {
        "obj_pw": 0.911,
        "cls_pw": 0.631,
        "anchor_t": 2.91,
        "box_lw": 0.0296,
        "obj_lw": 0.301,
        "cls_lw": 0.06075
    }
    loss_func = Loss(model, hyp)
    loss = torch.zeros((4, ), device=device)
    for x, target0, img_path in reversed(dataset):
        img0 = x.numpy()
        img0 = img0.transpose(1, 2, 0)[:, :, ::-1]  # to (H, W, C), RGB to BGR,
        img0 = np.ascontiguousarray(img0)
        img = img0.copy()
        # 预处理
        x, target0 = x.to(device), target0.to(device)
        x = x.half() if half else x.float()
        x /= 255
        if x.dim() == 3:
            x = x[None]
        # 预测
        target, loss_target = model(x)
        loss += loss_func([loss_t.float() for loss_t in loss_target],
                          target0)[1]
        target = nms(target, 0.001, 0.6)

        # 后处理
        # 1
        target = target[0]
        boxes = target[:, :4].cpu().numpy()
        scores = target[:, 4].cpu().numpy()
        labels = target[:, 5].cpu().numpy()
        draw_target_in_image(img, boxes, labels, scores, "voc")
        cv.imshow("pred", img)
        # 2
        img2 = img0.copy()
        h, w = img2.shape[:2]
        boxes = target0[:, 2:].cpu().numpy()
        labels = target0[:, 1].cpu().numpy()
        boxes = cxcywh2ltrb(boxes)
        boxes[:, 0::2] *= w  # lr
        boxes[:, 1::2] *= h  # tb
        draw_target_in_image(img2, boxes, labels, None, "voc")
        cv.imshow("target", img2)
        cv.waitKey(0)