name = cfg.video.split('/')[-1]
    video_writer = cv2.VideoWriter(f'results/videos/{name}', cv2.VideoWriter_fourcc(*"mp4v"), target_fps,
                                   (frame_width, frame_height))

    progress_bar = ProgressBar(40, num_frames)
    timer.reset()
    t_fps = 0

    for i in range(num_frames):
        if i == 1:
            timer.start()

        frame_origin = vid.read()[1]
        img_h, img_w = frame_origin.shape[0:2]
        frame_trans = val_aug(frame_origin, cfg.img_size)[None, :]

        with timer.counter('forward'):
            class_p, box_p, coef_p, proto_p, anchors = sess.run(None, {input_name: frame_trans})

        with timer.counter('nms'):
            ids_p, class_p, box_p, coef_p, proto_p = nms_numpy(class_p, box_p, coef_p, proto_p, anchors, cfg)

        with timer.counter('after_nms'):
            ids_p, class_p, boxes_p, masks_p = after_nms_numpy(ids_p, class_p, box_p, coef_p,
                                                               proto_p, img_h, img_w, cfg)

        with timer.counter('save_img'):
            frame_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, frame_origin, cfg, fps=t_fps)

        if cfg.real_time:
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(description='YOLACT Detection.')
    parser.add_argument('--weight', default='weights/best_30.5_res101_coco_392000.pth', type=str)
    parser.add_argument('--image', default=None, type=str, help='The folder of images for detecting.')
    parser.add_argument('--video', default=None, type=str, help='The path of the video to evaluate.')
    parser.add_argument('--img_size', type=int, default=544, help='The image size for validation.')
    parser.add_argument('--traditional_nms', default=False, action='store_true', help='Whether to use traditional nms.')
    parser.add_argument('--hide_mask', default=False, action='store_true', help='Hide masks in results.')
    parser.add_argument('--hide_bbox', default=False, action='store_true', help='Hide boxes in results.')
    parser.add_argument('--hide_score', default=False, action='store_true', help='Hide scores in results.')
    parser.add_argument('--cutout', default=False, action='store_true', help='Cut out each object and save.')
    parser.add_argument('--save_lincomb', default=False, action='store_true', help='Show the generating process of masks.')
    parser.add_argument('--no_crop', default=False, action='store_true',
                        help='Do not crop the output masks with the predicted bounding box.')
    parser.add_argument('--real_time', default=False, action='store_true', help='Show the detection results real-timely.')
    parser.add_argument('--visual_thre', default=0.3, type=float,
                        help='Detections with a score under this threshold will be removed.')

    args = parser.parse_args()
    prefix = re.findall(r'best_\d+\.\d+_', args.weight)[0]
    suffix = re.findall(r'_\d+\.pth', args.weight)[0]
    args.cfg = args.weight.split(prefix)[-1].split(suffix)[0]
    cfg = get_config(args, mode='detect')

    net = Yolact(cfg)
    net.load_weights(cfg.weight, cfg.cuda)
    net.eval()

    if cfg.cuda:
        cudnn.benchmark = True
        cudnn.fastest = True
        net = net.cuda()

    # detect images
    if cfg.image is not None:
        dataset = COCODetection(cfg, mode='detect')
        data_loader = data.DataLoader(dataset, 1, num_workers=2, shuffle=False, pin_memory=True, collate_fn=detect_collate)
        ds = len(data_loader)
        assert ds > 0, 'No .jpg images found.'
        progress_bar = ProgressBar(40, ds)
        timer.reset()

        for i, (img, img_origin, img_name) in enumerate(data_loader):
            if i == 1:
                timer.start()

            if cfg.cuda:
                img = img.cuda()

            img_h, img_w = img_origin.shape[0:2]

            with torch.no_grad(), timer.counter('forward'):
                class_p, box_p, coef_p, proto_p = net(img)

            with timer.counter('nms'):
                ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg)

            with timer.counter('after_nms'):
                ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p,
                                                            img_h, img_w, cfg, img_name=img_name)

            with timer.counter('save_img'):
                img_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, img_origin, cfg, img_name=img_name)
                cv2.imwrite(f'results/images/{img_name}', img_numpy)

            aa = time.perf_counter()
            if i > 0:
                batch_time = aa - temp
                timer.add_batch_time(batch_time)
            temp = aa

            if i > 0:
                t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward',
                                                                    'nms', 'after_nms', 'save_img'])
                fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t
                bar_str = progress_bar.get_bar(i + 1)
                print(f'\rTesting: {bar_str} {i + 1}/{ds}, fps: {fps:.2f} | total fps: {t_fps:.2f} | '
                    f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | '
                    f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='')

        print('\nFinished, saved in: results/images.')

    # detect videos
    elif cfg.video is not None:
        vid = cv2.VideoCapture(cfg.video)

        target_fps = round(vid.get(cv2.CAP_PROP_FPS))
        frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))

        name = cfg.video.split('/')[-1]
        video_writer = cv2.VideoWriter(f'results/videos/{name}', cv2.VideoWriter_fourcc(*"mp4v"), target_fps,
                                    (frame_width, frame_height))

        progress_bar = ProgressBar(40, num_frames)
        timer.reset()
        t_fps = 0

        for i in range(num_frames):
            if i == 1:
                timer.start()

            frame_origin = vid.read()[1]
            img_h, img_w = frame_origin.shape[0:2]
            frame_trans = val_aug(frame_origin, cfg.img_size)

            frame_tensor = torch.tensor(frame_trans).float()
            if cfg.cuda:
                frame_tensor = frame_tensor.cuda()

            with torch.no_grad(), timer.counter('forward'):
                class_p, box_p, coef_p, proto_p = net(frame_tensor.unsqueeze(0))

            with timer.counter('nms'):
                ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg)

            with timer.counter('after_nms'):
                ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg)

            with timer.counter('save_img'):
                frame_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, frame_origin, cfg, fps=t_fps)

            if cfg.real_time:
                cv2.imshow('Detection', frame_numpy)
                cv2.waitKey(1)
            else:
                video_writer.write(frame_numpy)

            aa = time.perf_counter()
            if i > 0:
                batch_time = aa - temp
                timer.add_batch_time(batch_time)
            temp = aa

            if i > 0:
                t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward',
                                                                    'nms', 'after_nms', 'save_img'])
                fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t
                bar_str = progress_bar.get_bar(i + 1)
                print(f'\rDetecting: {bar_str} {i + 1}/{num_frames}, fps: {fps:.2f} | total fps: {t_fps:.2f} | '
                    f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | '
                    f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='')

        if not cfg.real_time:
            print(f'\n\nFinished, saved in: results/videos/{name}')

        vid.release()
        video_writer.release()
Ejemplo n.º 3
0
    def __getitem__(self, index):
        if self.mode == 'detect':
            img_name = self.image_path[index]
            img_origin = cv2.imread(img_name)
            img_normed = val_aug(img_origin, self.cfg.img_size)
            return img_normed, img_origin, img_name.split(osp.sep)[-1]
        else:
            img_id = self.ids[index]
            ann_ids = self.coco.getAnnIds(imgIds=img_id)

            # 'target' includes {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'}
            target = self.coco.loadAnns(ann_ids)
            target = [aa for aa in target if not aa['iscrowd']]

            file_name = self.coco.loadImgs(img_id)[0]['file_name']

            img_path = osp.join(self.image_path, file_name)
            assert osp.exists(
                img_path), f'Image path does not exist: {img_path}'

            img = cv2.imread(img_path)
            height, width, _ = img.shape

            assert len(target) > 0, 'No annotation in this image!'
            box_list, mask_list, label_list = [], [], []

            for aa in target:
                bbox = aa['bbox']

                # When training, some boxes are wrong, ignore them.
                if self.mode == 'train':
                    if bbox[0] < 0 or bbox[1] < 0 or bbox[2] < 4 or bbox[3] < 4:
                        continue

                x1y1x2y2_box = np.array(
                    [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
                category = self.continuous_id[aa['category_id']] - 1

                box_list.append(x1y1x2y2_box)
                mask_list.append(self.coco.annToMask(aa))
                label_list.append(category)

            if len(box_list) > 0:
                boxes = np.array(box_list)
                masks = np.stack(mask_list, axis=0)
                labels = np.array(label_list)
                assert masks.shape == (boxes.shape[0], height,
                                       width), 'Unmatched annotations.'

                if self.mode == 'train':
                    img, masks, boxes, labels = train_aug(
                        img, masks, boxes, labels, self.cfg.img_size)
                    if img is None:
                        return None, None, None
                    else:
                        boxes = np.hstack((boxes, np.expand_dims(labels,
                                                                 axis=1)))
                        return img, boxes, masks
                elif self.mode == 'val':
                    img = val_aug(img, self.cfg.img_size)
                    boxes = boxes / np.array([width, height, width, height
                                              ])  # to 0~1 scale
                    boxes = np.hstack((boxes, np.expand_dims(labels, axis=1)))
                    return img, boxes, masks, height, width
            else:
                if self.mode == 'val':
                    raise RuntimeError('Error, no valid object in this image.')
                else:
                    print(
                        f'No valid object in image: {img_id}. Use a repeated image in this batch.'
                    )
                    return None, None, None
Ejemplo n.º 4
0
        name = cfg.video.split('/')[-1]
        video_writer = cv2.VideoWriter(f'results/videos/{name}',
                                       cv2.VideoWriter_fourcc(*"mp4v"),
                                       target_fps, (frame_width, frame_height))

        progress_bar = ProgressBar(40, num_frames)
        timer.reset()
        t_fps = 0

        for i in range(num_frames):
            if i == 1:
                timer.start()

            frame_origin = vid.read()[1]
            img_h, img_w = frame_origin.shape[0:2]
            frame_trans = val_aug(frame_origin, cfg)

            frame_tensor = torch.tensor(frame_trans).float()
            if cfg.cuda:
                frame_tensor = frame_tensor.cuda()

            with timer.counter('forward'):
                net_outs = net(frame_tensor.unsqueeze(0))

            with timer.counter('nms'):
                nms_outs = nms(cfg, net_outs)

            with timer.counter('after_nms'):
                results = after_nms(nms_outs, img_h, img_w, cfg)

            with timer.counter('save_img'):