Beispiel #1
0
def evaluate(net,
             dataset,
             max_num=-1,
             during_training=False,
             cocoapi=False,
             traditional_nms=False):
    frame_times = MovingAverage()
    dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset))
    dataset_indices = list(range(len(dataset)))
    dataset_indices = dataset_indices[:dataset_size]
    progress_bar = ProgressBar(40, dataset_size)

    # For each class and iou, stores tuples (score, isPositive)
    # Index ap_data[type][iouIdx][classIdx]
    ap_data = {
        'box': [[APDataObject() for _ in cfg.dataset.class_names]
                for _ in iou_thresholds],
        'mask': [[APDataObject() for _ in cfg.dataset.class_names]
                 for _ in iou_thresholds]
    }
    make_json = Make_json()

    for i, image_idx in enumerate(dataset_indices):
        timer.reset()

        with timer.env('Data loading'):
            img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)

            batch = img.unsqueeze(0)
            if cuda:
                batch = batch.cuda()

        with timer.env('Network forward'):
            #changed
            net_outs = net(batch)
            nms_outs = NMS(net_outs, traditional_nms)
            prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd,
                         dataset.ids[image_idx], make_json, cocoapi)

        # First couple of images take longer because we're constructing the graph.
        # Since that's technically initialization, don't include those in the FPS calculations.
        fps = 0
        if i > 1 and not during_training:
            frame_times.add(timer.total_time())
            fps = 1 / frame_times.get_avg()

        progress = (i + 1) / dataset_size * 100
        progress_bar.set_val(i + 1)
        print('\rProcessing:  %s  %d / %d (%.2f%%)  %.2f fps  ' %
              (repr(progress_bar), i + 1, dataset_size, progress, fps),
              end='')
    else:
        table, box_row, mask_row = calc_map(ap_data)
        print(table)
        return table, box_row, mask_row
Beispiel #2
0
def savevideo(net: Yolact, in_path: str, out_path: str):

    vid = cv2.VideoCapture(in_path)

    target_fps = round(vid.get(cv2.CAP_PROP_FPS))
    frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"),
                          target_fps, (frame_width, frame_height))
    frame_freq = 10
    transform = FastBaseTransform()
    frame_times = MovingAverage()
    progress_bar = ProgressBar(30, num_frames)
    preds = None
    try:
        for i in range(num_frames):
            timer.reset()
            with timer.env('Video'):
                # process only 10th frame
                # care to be taken that the first frame is read always
                frame = torch.from_numpy(vid.read()[1]).cuda().float()
                # need to adjust for multi frame
                if i % frame_freq == 0:
                    batch = transform(frame.unsqueeze(0))
                    preds = net(batch)
                current_preds = make_copy(preds)
                processed = prep_display(current_preds,
                                         frame,
                                         None,
                                         None,
                                         undo_transform=False,
                                         class_color=True)
                out.write(processed)
            if i > 1:
                frame_times.add(timer.total_time())
                fps = 1 / frame_times.get_avg()
                progress = (i + 1) / num_frames * 100
                progress_bar.set_val(i + 1)

                print(
                    '\rProcessing Frames  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                    % (repr(progress_bar), i + 1, num_frames, progress, fps),
                    end='')
    except KeyboardInterrupt:
        print('Stopping early.')

    vid.release()
    out.release()
    print()
Beispiel #3
0
def evaluate(net: Yolact, dataset, train_mode=False):
    net.detect.use_fast_nms = args.fast_nms
    net.detect.use_cross_class_nms = args.cross_class_nms
    cfg.mask_proto_debug = args.mask_proto_debug

    # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo
    if args.image is not None:
        if ':' in args.image:
            inp, out = args.image.split(':')
            evalimage(net, inp, out)
        else:
            evalimage(net, args.image)
        return
    elif args.images is not None:
        inp, out = args.images.split(':')
        evalimages(net, inp, out)
        return
    elif args.video is not None:
        if ':' in args.video:
            inp, out = args.video.split(':')
            evalvideo(net, inp, out)
        else:
            evalvideo(net, args.video)
        return

    frame_times = MovingAverage()
    dataset_size = len(dataset) if args.max_images < 0 else min(
        args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()

    if not args.display and not args.benchmark:
        # For each class and iou, stores tuples (score, isPositive)
        # Index ap_data[type][iouIdx][classIdx]
        ap_data = {
            'box': [[APDataObject() for _ in cfg.dataset.class_names]
                    for _ in iou_thresholds],
            'mask': [[APDataObject() for _ in cfg.dataset.class_names]
                     for _ in iou_thresholds]
        }
        detections = Detections()
    else:
        timer.disable('Load Data')

    dataset_indices = list(range(len(dataset)))

    if args.shuffle:
        random.shuffle(dataset_indices)
    elif not args.no_sort:
        # Do a deterministic shuffle based on the image ids
        #
        # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's
        # the order of insertion. That means on python 3.6, the images come in the order they are in
        # in the annotations file. For some reason, the first images in the annotations file are
        # the hardest. To combat this, I use a hard-coded hash function based on the image ids
        # to shuffle the indices we use. That way, no matter what python version or how pycocotools
        # handles the data, we get the same result every time.
        hashed = [badhash(x) for x in dataset.ids]
        dataset_indices.sort(key=lambda x: hashed[x])

    # dataset_size=1000
    dataset_indices = dataset_indices[:dataset_size]

    try:
        # Main eval loop
        dataset.batch_size = 1
        dataset.num_workers = 1
        for it, batch in enumerate(dataset):
            timer.reset()
            image_idx, img, gt, gt_masks, h, w, num_crowd = batch[0]

            if not args.benchmark:
                gt = gt.numpy()
                gt_masks = gt_masks.numpy()
            batch = img.reshape(1, img.shape[0], img.shape[1], img.shape[2])
            # batch = jt.array([img])

            with timer.env('Network Extra'):
                preds = net(batch)

            if args.display:
                img_numpy = prep_display(preds, img, h, w)
            elif args.benchmark:
                prep_benchmark(preds, h, w)
            else:
                prep_metrics(ap_data, preds, img, gt, gt_masks, h, w,
                             num_crowd, dataset.ids[image_idx], detections)

            # First couple of images take longer because we're constructing the graph.
            # Since that's technically initialization, don't include those in the FPS calculations.
            if it > 1:
                frame_times.add(timer.total_time())

            if args.display:
                if it > 1:
                    print('Avg FPS: %.4f' % (1 / frame_times.get_avg()))
                plt.imshow(img_numpy)
                plt.title(str(dataset.ids[image_idx]))
                plt.show()
            elif not args.no_bar:
                if it > 1: fps = 1 / frame_times.get_avg()
                else: fps = 0
                progress = (it + 1) / dataset_size * 100
                progress_bar.set_val(it + 1)
                print(
                    '\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                    %
                    (repr(progress_bar), it + 1, dataset_size, progress, fps),
                    end='')

        jt.sync_all(True)

        if not args.display and not args.benchmark:
            print()
            if args.output_coco_json:
                print('Dumping detections..')
                if args.output_web_json:
                    detections.dump_web()
                else:
                    detections.dump()
            else:
                if not train_mode:
                    print('Saving data..')
                    with open(args.ap_data_file, 'wb') as f:
                        pickle.dump(ap_data, f)

                return calc_map(ap_data)
        elif args.benchmark:
            print()
            print()
            print('Stats for the last frame:')
            timer.print_stats()
            avg_seconds = frame_times.get_avg()
            print('Average: %5.2f fps, %5.2f ms' %
                  (1 / frame_times.get_avg(), 1000 * avg_seconds))
    except KeyboardInterrupt:
        print('Stopping..')
args = parser.parse_args()
args.cfg = re.findall(r'res.+_[a-z]+', args.weight)[0]
cfg = get_config(args, mode='detect')

sess = ort.InferenceSession(cfg.weight)
input_name = sess.get_inputs()[0].name

# detect images
if cfg.image is not None:
    dataset = COCODetection(cfg, mode='detect')
    data_loader = data.DataLoader(dataset, 1, num_workers=4, shuffle=False,
                                  pin_memory=True, collate_fn=detect_onnx_collate)
    ds = len(data_loader)
    assert ds > 0, 'No .jpg images found.'
    progress_bar = ProgressBar(40, ds)
    timer.reset()

    for i, (img, img_origin, img_name) in enumerate(data_loader):
        if i == 1:
            timer.start()

        img_h, img_w = img_origin.shape[0:2]

        with timer.counter('forward'):
            class_p, box_p, coef_p, proto_p, anchors = sess.run(None, {input_name: img})

        with timer.counter('nms'):
            ids_p, class_p, box_p, coef_p, proto_p = nms_numpy(class_p, box_p, coef_p, proto_p, anchors, cfg)

        with timer.counter('after_nms'):
            ids_p, class_p, boxes_p, masks_p = after_nms_numpy(ids_p, class_p, box_p, coef_p,
Beispiel #5
0
args = parser.parse_args()
cfg = get_config(args)

model = PAA(cfg)
model.train().cuda()  # broadcast_buffers is True if BN is used
model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank, broadcast_buffers=False)

# if cfg.MODEL.USE_SYNCBN:  # TODO: figure this out
#     model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

optim = Optimizer(model, cfg)
checkpointer = Checkpointer(cfg, model.module, optim.optimizer)
start_iter = int(cfg.resume.split('_')[-1].split('.')[0]) if cfg.resume else 0
data_loader = make_data_loader(cfg, start_iter=start_iter)
max_iter = len(data_loader) - 1
timer.reset()
main_gpu = dist.get_rank() == 0
num_gpu = dist.get_world_size()

for i, (img_list_batch, box_list_batch) in enumerate(data_loader, start_iter):
    if main_gpu and i == start_iter + 1:
        timer.start()

    optim.update_lr(step=i)

    img_tensor_batch = torch.stack([aa.img for aa in img_list_batch], dim=0).cuda()
    for box_list in box_list_batch:
        box_list.to_cuda()

    with timer.counter('for+loss'):
        category_loss, box_loss, iou_loss = model(img_tensor_batch, box_list_batch)
Beispiel #6
0
def evaluate(net, cfg, step=None):
    dataset = COCODetection(cfg, mode='val')
    data_loader = data.DataLoader(dataset,
                                  1,
                                  num_workers=4,
                                  shuffle=False,
                                  pin_memory=True,
                                  collate_fn=val_collate)
    ds = len(data_loader)
    progress_bar = ProgressBar(40, ds)
    timer.reset()

    ap_data = {
        'box': [[APDataObject() for _ in cfg.class_names] for _ in iou_thres],
        'mask': [[APDataObject() for _ in cfg.class_names] for _ in iou_thres]
    }

    with torch.no_grad():
        for i, (img, gt, gt_masks, img_h, img_w) in enumerate(data_loader):
            if i == 1:
                timer.start()

            if cfg.cuda:
                img, gt, gt_masks = img.cuda(), gt.cuda(), gt_masks.cuda()

            with timer.counter('forward'):
                class_p, box_p, coef_p, proto_p, anchors = net(img)

            with timer.counter('nms'):
                ids_p, class_p, box_p, coef_p, proto_p = nms(
                    class_p, box_p, coef_p, proto_p, anchors, cfg)

            with timer.counter('after_nms'):
                ids_p, class_p, boxes_p, masks_p = after_nms(
                    ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w)
                if ids_p is None:
                    continue

            with timer.counter('metric'):
                ids_p = list(ids_p.cpu().numpy().astype(int))
                class_p = list(class_p.cpu().numpy().astype(float))

                if cfg.coco_api:
                    boxes_p = boxes_p.cpu().numpy()
                    masks_p = masks_p.cpu().numpy()

                    for j in range(masks_p.shape[0]):
                        if (boxes_p[j, 3] - boxes_p[j, 1]) * (
                                boxes_p[j, 2] - boxes_p[j, 0]) > 0:
                            make_json.add_bbox(dataset.ids[i], ids_p[j],
                                               boxes_p[j, :], class_p[j])
                            make_json.add_mask(dataset.ids[i], ids_p[j],
                                               masks_p[j, :, :], class_p[j])
                else:
                    prep_metrics(ap_data, ids_p, class_p, boxes_p, masks_p, gt,
                                 gt_masks, img_h, img_w, iou_thres)

            aa = time.perf_counter()
            if i > 0:
                batch_time = aa - temp
                timer.add_batch_time(batch_time)
            temp = aa

            if i > 0:
                t_t, t_d, t_f, t_nms, t_an, t_me = timer.get_times(
                    ['batch', 'data', 'forward', 'nms', 'after_nms', 'metric'])
                fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t
                bar_str = progress_bar.get_bar(i + 1)
                print(
                    f'\rTesting: {bar_str} {i + 1}/{ds}, fps: {fps:.2f} | total fps: {t_fps:.2f} | '
                    f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | '
                    f't_after_nms: {t_an:.3f} | t_metric: {t_me:.3f}',
                    end='')

        if cfg.coco_api:
            make_json.dump()
            print(
                f'\nJson files dumped, saved in: \'results/\', start evaluating.'
            )

            gt_annotations = COCO(cfg.val_ann)
            bbox_dets = gt_annotations.loadRes(f'results/bbox_detections.json')
            mask_dets = gt_annotations.loadRes(f'results/mask_detections.json')

            print('\nEvaluating BBoxes:')
            bbox_eval = COCOeval(gt_annotations, bbox_dets, 'bbox')
            bbox_eval.evaluate()
            bbox_eval.accumulate()
            bbox_eval.summarize()

            print('\nEvaluating Masks:')
            bbox_eval = COCOeval(gt_annotations, mask_dets, 'segm')
            bbox_eval.evaluate()
            bbox_eval.accumulate()
            bbox_eval.summarize()
        else:
            table, box_row, mask_row = calc_map(ap_data,
                                                iou_thres,
                                                len(cfg.class_names),
                                                step=step)
            print(table)
            return table, box_row, mask_row
Beispiel #7
0
def evaluate(net: STMask, dataset):
    net.detect.use_fast_nms = args.fast_nms
    cfg.mask_proto_debug = args.mask_proto_debug

    frame_times = MovingAverage()
    dataset_size = math.ceil(len(dataset) /
                             args.batch_size) if args.max_images < 0 else min(
                                 args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  shuffle=False,
                                  collate_fn=detection_collate,
                                  pin_memory=True)
    results = []

    try:
        # Main eval loop
        for it, data_batch in enumerate(data_loader):
            timer.reset()

            with timer.env('Load Data'):
                images, images_meta, ref_images, ref_images_meta = prepare_data(
                    data_batch, is_cuda=True, train_mode=False)
            pad_h, pad_w = images.size()[2:4]

            with timer.env('Network Extra'):
                preds = net(images,
                            img_meta=images_meta,
                            ref_x=ref_images,
                            ref_imgs_meta=ref_images_meta)

            # Perform the meat of the operation here depending on our mode.
            if it == dataset_size - 1:
                batch_size = len(dataset) % args.batch_size
            else:
                batch_size = images.size(0)

            for batch_id in range(batch_size):
                if args.display:
                    img_id = (images_meta[batch_id]['video_id'],
                              images_meta[batch_id]['frame_id'])
                    if not cfg.display_mask_single:
                        img_numpy = prep_display(
                            preds[batch_id],
                            images[batch_id],
                            pad_h,
                            pad_w,
                            img_meta=images_meta[batch_id],
                            img_ids=img_id)
                    else:
                        for p in range(
                                preds[batch_id]['detection']['box'].size(0)):
                            preds_single = {'detection': {}}
                            for k in preds[batch_id]['detection']:
                                if preds[batch_id]['detection'][
                                        k] is not None and k not in {'proto'}:
                                    preds_single['detection'][k] = preds[
                                        batch_id]['detection'][k][p]
                                else:
                                    preds_single['detection'][k] = None
                            preds_single['net'] = preds[batch_id]['net']
                            preds_single['detection'][
                                'box_ids'] = torch.tensor(-1)

                            img_numpy = prep_display(
                                preds_single,
                                images[batch_id],
                                pad_h,
                                pad_w,
                                img_meta=images_meta[batch_id],
                                img_ids=img_id)
                            plt.imshow(img_numpy)
                            plt.axis('off')
                            plt.savefig(''.join([
                                args.mask_det_file[:-12], 'out_single/',
                                str(img_id), '_',
                                str(p), '.png'
                            ]))
                            plt.clf()

                else:
                    cfg.preserve_aspect_ratio = True
                    preds_cur = postprocess_ytbvis(
                        preds[batch_id],
                        pad_h,
                        pad_w,
                        images_meta[batch_id],
                        score_threshold=cfg.eval_conf_thresh)
                    segm_results = bbox2result_with_id(preds_cur, cfg.classes)
                    results.append(segm_results)

                # First couple of images take longer because we're constructing the graph.
                # Since that's technically initialization, don't include those in the FPS calculations.
                if it > 1:
                    frame_times.add(timer.total_time() / batch_size)

                if args.display and not cfg.display_mask_single:
                    if it > 1:
                        print('Avg FPS: %.4f' % (1 / frame_times.get_avg()))
                    plt.imshow(img_numpy)
                    plt.axis('off')
                    plt.title(str(img_id))

                    root_dir = ''.join([
                        args.mask_det_file[:-12], 'out/',
                        str(images_meta[batch_id]['video_id']), '/'
                    ])
                    if not os.path.exists(root_dir):
                        os.makedirs(root_dir)
                    plt.savefig(''.join([
                        root_dir,
                        str(images_meta[batch_id]['frame_id']), '.png'
                    ]))
                    plt.clf()
                    # plt.show()
                elif not args.no_bar:
                    if it > 1: fps = 1 / frame_times.get_avg()
                    else: fps = 0
                    progress = (it + 1) / dataset_size * 100
                    progress_bar.set_val(it + 1)
                    print(
                        '\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                        % (repr(progress_bar), it + 1, dataset_size, progress,
                           fps),
                        end='')

        if not args.display and not args.benchmark:
            print()
            if args.output_json:
                print('Dumping detections...')
                results2json_videoseg(dataset, results, args.mask_det_file)

                if cfg.use_valid_sub or cfg.use_train_sub:
                    if cfg.use_valid_sub:
                        print('calculate evaluation metrics ...')
                        ann_file = cfg.valid_sub_dataset.ann_file
                    else:
                        print('calculate train_sub metrics ...')
                        ann_file = cfg.train_dataset.ann_file
                    dt_file = args.mask_det_file
                    metrics = calc_metrics(ann_file, dt_file)

                    return metrics

        elif args.benchmark:
            print()
            print()
            print('Stats for the last frame:')
            timer.print_stats()
            avg_seconds = frame_times.get_avg()
            print('Average: %5.2f fps, %5.2f ms' %
                  (1 / frame_times.get_avg(), 1000 * avg_seconds))

    except KeyboardInterrupt:
        print('Stopping...')
Beispiel #8
0
def validation(net: STMask, valid_data=False, output_metrics_file=None):
    cfg.mask_proto_debug = args.mask_proto_debug
    if not valid_data:
        cfg.valid_sub_dataset.test_mode = True
        dataset = get_dataset(cfg.valid_sub_dataset)
    else:
        cfg.valid_dataset.test_mode = True
        dataset = get_dataset(cfg.valid_dataset)

    frame_times = MovingAverage()
    dataset_size = math.ceil(len(dataset) /
                             args.batch_size) if args.max_images < 0 else min(
                                 args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()
    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  shuffle=False,
                                  collate_fn=detection_collate,
                                  pin_memory=True)
    results = []

    try:
        # Main eval loop
        for it, data_batch in enumerate(data_loader):
            timer.reset()
            with timer.env('Load Data'):
                images, images_meta, ref_images, ref_images_meta = prepare_data(
                    data_batch, is_cuda=True, train_mode=False)
                pad_h, pad_w = images.size()[2:4]

            with timer.env('Network Extra'):
                preds = net(images,
                            img_meta=images_meta,
                            ref_x=ref_images,
                            ref_imgs_meta=ref_images_meta)

                if it == dataset_size - 1:
                    batch_size = len(dataset) % args.batch_size
                else:
                    batch_size = images.size(0)

                for batch_id in range(batch_size):
                    cfg.preserve_aspect_ratio = True
                    preds_cur = postprocess_ytbvis(
                        preds[batch_id],
                        pad_h,
                        pad_w,
                        images_meta[batch_id],
                        score_threshold=cfg.eval_conf_thresh)
                    segm_results = bbox2result_with_id(preds_cur, cfg.classes)
                    results.append(segm_results)

            # First couple of images take longer because we're constructing the graph.
            # Since that's technically initialization, don't include those in the FPS calculations.
            if it > 1:
                if batch_size == 0:
                    batch_size = 1
                frame_times.add(timer.total_time() / batch_size)

            if it > 1 and frame_times.get_avg() > 0:
                fps = 1 / frame_times.get_avg()
            else:
                fps = 0
            progress = (it + 1) / dataset_size * 100
            progress_bar.set_val(it + 1)
            print(
                '\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                % (repr(progress_bar), it + 1, dataset_size, progress, fps),
                end='')

        print()
        print('Dumping detections...')

        if not valid_data:
            results2json_videoseg(dataset, results, args.mask_det_file)
            print('calculate evaluation metrics ...')
            ann_file = cfg.valid_sub_dataset.ann_file
            dt_file = args.mask_det_file
            calc_metrics(ann_file, dt_file, output_file=output_metrics_file)
        else:
            results2json_videoseg(dataset, results,
                                  output_metrics_file.replace('.txt', '.json'))

    except KeyboardInterrupt:
        print('Stopping...')
Beispiel #9
0
def evaluate(net: Yolact, dataset, train_mode=False):
    net.detect.use_fast_nms = args.fast_nms
    cfg.mask_proto_debug = args.mask_proto_debug

    if args.image is not None:
        if ':' in args.image:
            inp, out = args.image.split(':')
            evalimage(net, inp, out)
        else:
            evalimage(net, args.image)
        return
    elif args.images is not None:
        inp, out = args.images.split(':')
        evalimages(net, inp, out)
        return
    elif args.video is not None:
        if ':' in args.video:
            inp, out = args.video.split(':')
            savevideo(net, inp, out)
        else:
            evalvideo(net, args.video)
        return

    frame_times = MovingAverage()
    dataset_size = len(dataset) if args.max_images < 0 else min(
        args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()

    if not args.display and not args.benchmark:
        # For each class and iou, stores tuples (score, isPositive)
        # Index ap_data[type][iouIdx][classIdx]
        ap_data = {
            'box': [[APDataObject() for _ in cfg.dataset.class_names]
                    for _ in iou_thresholds],
            'mask': [[APDataObject() for _ in cfg.dataset.class_names]
                     for _ in iou_thresholds]
        }
        detections = Detections()
    else:
        timer.disable('Load Data')

    dataset_indices = list(range(len(dataset)))

    if args.shuffle:
        random.shuffle(dataset_indices)
    elif not args.no_sort:
        hashed = [badhash(x) for x in dataset.ids]
        dataset_indices.sort(key=lambda x: hashed[x])

    dataset_indices = dataset_indices[:dataset_size]

    try:
        # Main eval loop
        for it, image_idx in enumerate(dataset_indices):
            timer.reset()

            with timer.env('Load Data'):
                img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(
                    image_idx)

                # Test flag, do not upvote
                if cfg.mask_proto_debug:
                    with open('scripts/info.txt', 'w') as f:
                        f.write(str(dataset.ids[image_idx]))
                    np.save('scripts/gt.npy', gt_masks)

                batch = Variable(img.unsqueeze(0))
                if args.cuda:
                    batch = batch.cuda()

            with timer.env('Network Extra'):
                preds = net(batch)

            # Perform the meat of the operation here depending on our mode.
            if args.display:
                img_numpy = prep_display(preds, img, h, w)
            elif args.benchmark:
                prep_benchmark(preds, h, w)
            else:
                prep_metrics(ap_data, preds, img, gt, gt_masks, h, w,
                             num_crowd, dataset.ids[image_idx], detections)

            # First couple of images take longer because we're constructing the graph.
            # Since that's technically initialization, don't include those in the FPS calculations.
            if it > 1:
                frame_times.add(timer.total_time())

            if args.display:
                if it > 1:
                    print('Avg FPS: %.4f' % (1 / frame_times.get_avg()))
                plt.imshow(img_numpy)
                plt.title(str(dataset.ids[image_idx]))
                plt.show()
            elif not args.no_bar:
                if it > 1: fps = 1 / frame_times.get_avg()
                else: fps = 0
                progress = (it + 1) / dataset_size * 100
                progress_bar.set_val(it + 1)
                print(
                    '\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                    %
                    (repr(progress_bar), it + 1, dataset_size, progress, fps),
                    end='')

        if not args.display and not args.benchmark:
            print()
            if args.output_coco_json:
                print('Dumping detections...')
                if args.output_web_json:
                    detections.dump_web()
                else:
                    detections.dump()
            else:
                if not train_mode:
                    print('Saving data...')
                    with open(args.ap_data_file, 'wb') as f:
                        pickle.dump(ap_data, f)

                return calc_map(ap_data)
        elif args.benchmark:
            print()
            print()
            print('Stats for the last frame:')
            timer.print_stats()
            avg_seconds = frame_times.get_avg()
            print('Average: %5.2f fps, %5.2f ms' %
                  (1 / frame_times.get_avg(), 1000 * avg_seconds))

    except KeyboardInterrupt:
        print('Stopping...')
Beispiel #10
0
def inference(model, cfg, during_training=False):
    model.eval()
    predictions, coco_results = {}, []
    val_loader = make_data_loader(cfg, during_training=during_training)
    dataset = val_loader.dataset
    dl = len(val_loader)
    bar = ProgressBar(length=40, max_val=dl)
    timer.reset()

    with torch.no_grad():
        for i, (img_list_batch, _) in enumerate(val_loader):
            if i == 1:
                timer.start()

            with timer.counter('forward'):
                img_tensor_batch = torch.stack(
                    [aa.img for aa in img_list_batch], dim=0).cuda()
                c_pred, box_pred, iou_pred, anchors = model(img_tensor_batch)

            with timer.counter('post_process'):
                resized_size = [aa.resized_size for aa in img_list_batch]
                pred_batch = post_process(cfg, c_pred, box_pred, iou_pred,
                                          anchors, resized_size)

            with timer.counter('accumulate'):
                for pred in pred_batch:
                    pred.to_cpu()

                for img_list, pred in zip(img_list_batch, pred_batch):
                    if pred.box.shape[0] == 0:
                        continue

                    original_id = dataset.id_img_map[img_list.id]
                    pred.resize(img_list.ori_size)
                    pred.convert_mode("x1y1wh")

                    boxes = pred.box.tolist()
                    score = pred.score.tolist()
                    label = pred.label.tolist()

                    mapped_labels = [dataset.to_category_id[i] for i in label]
                    coco_results.extend([{
                        "image_id": original_id,
                        "category_id": mapped_labels[k],
                        "bbox": box,
                        "score": score[k]
                    } for k, box in enumerate(boxes)])

            aa = time.perf_counter()
            if i > 0:
                batch_time = aa - temp
                timer.add_batch_time(batch_time)

                time_name = [
                    'batch', 'data', 'forward', 'post_process', 'accumulate'
                ]
                t_t, t_d, t_f, t_pp, t_acc = timer.get_times(time_name)
                fps, t_fps = 1 / (t_d + t_f + t_pp), 1 / t_t
                bar_str = bar.get_bar(i + 1)
                print(
                    f'\rTesting: {bar_str} {i + 1}/{dl}, fps: {fps:.2f} | total fps: {t_fps:.2f} | t_t: {t_t:.3f} | '
                    f't_d: {t_d:.3f} | t_f: {t_f:.3f} | t_pp: {t_pp:.3f} | t_acc: {t_acc:.3f}',
                    end='')

            temp = aa

    print('\n\nTest ended, doing evaluation...')

    json_name = cfg.weight.split('/')[-1].split('.')[0]
    file_path = f'results/{json_name}.json'
    with open(file_path, "w") as f:
        json.dump(coco_results, f)

    coco_dt = dataset.coco.loadRes(file_path)

    if cfg.val_api == 'Improved COCO':
        from my_cocoeval.cocoeval import SelfEval
        bbox_eval = SelfEval(dataset.coco, coco_dt, all_points=True)
    else:
        from pycocotools.cocoeval import COCOeval
        bbox_eval = COCOeval(dataset.coco, coco_dt, iouType='bbox')

    bbox_eval.evaluate()
    bbox_eval.accumulate()
    bbox_eval.summarize()

    if not during_training:
        if cfg.val_api == 'Improved COCO':
            bbox_eval.draw_curve()
        else:
            compute_thre_per_class(bbox_eval)
Beispiel #11
0
def evaluate(net: Yolact, dataset, train_mode=False):
    net.detect.use_fast_nms = args.fast_nms
    net.detect.use_cross_class_nms = args.cross_class_nms
    cfg.mask_proto_debug = args.mask_proto_debug

    # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo
    if args.image is not None:
        if ":" in args.image:
            inp, out = args.image.split(":")
            evalimage(net, inp, out)
        else:
            evalimage(net, args.image)
        return
    elif args.images is not None:
        inp, out = args.images.split(":")
        evalimages(net, inp, out)
        return
    elif args.video is not None:
        if ":" in args.video:
            inp, out = args.video.split(":")
            evalvideo(net, inp, out)
        else:
            evalvideo(net, args.video)
        return

    frame_times = MovingAverage()
    dataset_size = (
        len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset))
    )
    progress_bar = ProgressBar(30, dataset_size)

    print()

    if not args.display and not args.benchmark:
        # For each class and iou, stores tuples (score, isPositive)
        # Index ap_data[type][iouIdx][classIdx]
        ap_data = {
            "box": [
                [APDataObject() for _ in cfg.dataset.class_names]
                for _ in iou_thresholds
            ],
            "mask": [
                [APDataObject() for _ in cfg.dataset.class_names]
                for _ in iou_thresholds
            ],
        }
        detections = Detections()
    else:
        timer.disable("Load Data")

    dataset_indices = list(range(len(dataset)))

    if args.shuffle:
        random.shuffle(dataset_indices)
    elif not args.no_sort:
        # Do a deterministic shuffle based on the image ids
        #
        # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's
        # the order of insertion. That means on python 3.6, the images come in the order they are in
        # in the annotations file. For some reason, the first images in the annotations file are
        # the hardest. To combat this, I use a hard-coded hash function based on the image ids
        # to shuffle the indices we use. That way, no matter what python version or how pycocotools
        # handles the data, we get the same result every time.
        hashed = [badhash(x) for x in dataset.ids]
        dataset_indices.sort(key=lambda x: hashed[x])

    dataset_indices = dataset_indices[:dataset_size]

    try:
        # Main eval loop
        for it, image_idx in enumerate(dataset_indices):
            timer.reset()

            with timer.env("Load Data"):
                img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)

                # Test flag, do not upvote
                if cfg.mask_proto_debug:
                    with open("scripts/info.txt", "w") as f:
                        f.write(str(dataset.ids[image_idx]))
                    np.save("scripts/gt.npy", gt_masks)

                batch = Variable(img.unsqueeze(0))
                if args.cuda:
                    batch = batch.cuda()

            with timer.env("Network Extra"):
                preds = net(batch)
            # Perform the meat of the operation here depending on our mode.
            if args.display:
                img_numpy = prep_display(preds, img, h, w)
            elif args.benchmark:
                prep_benchmark(preds, h, w)
            else:
                prep_metrics(
                    ap_data,
                    preds,
                    img,
                    gt,
                    gt_masks,
                    h,
                    w,
                    num_crowd,
                    dataset.ids[image_idx],
                    detections,
                )

            # First couple of images take longer because we're constructing the graph.
            # Since that's technically initialization, don't include those in the FPS calculations.
            if it > 1:
                frame_times.add(timer.total_time())

            if args.display:
                if it > 1:
                    print("Avg FPS: %.4f" % (1 / frame_times.get_avg()))
                plt.imshow(img_numpy)
                plt.title(str(dataset.ids[image_idx]))
                plt.show()
            elif not args.no_bar:
                if it > 1:
                    fps = 1 / frame_times.get_avg()
                else:
                    fps = 0
                progress = (it + 1) / dataset_size * 100
                progress_bar.set_val(it + 1)
                print(
                    "\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        "
                    % (repr(progress_bar), it + 1, dataset_size, progress, fps),
                    end="",
                )

        if not args.display and not args.benchmark:
            print()
            if args.output_coco_json:
                print("Dumping detections...")
                if args.output_web_json:
                    detections.dump_web()
                else:
                    detections.dump()
            else:
                if not train_mode:
                    print("Saving data...")
                    with open(args.ap_data_file, "wb") as f:
                        pickle.dump(ap_data, f)

                return calc_map(ap_data)
        elif args.benchmark:
            print()
            print()
            print("Stats for the last frame:")
            timer.print_stats()
            avg_seconds = frame_times.get_avg()
            print(
                "Average: %5.2f fps, %5.2f ms"
                % (1 / frame_times.get_avg(), 1000 * avg_seconds)
            )

    except KeyboardInterrupt:
        print("Stopping...")
Beispiel #12
0
def main():
    parser = argparse.ArgumentParser(description='YOLACT Detection.')
    parser.add_argument('--weight', default='weights/best_30.5_res101_coco_392000.pth', type=str)
    parser.add_argument('--image', default=None, type=str, help='The folder of images for detecting.')
    parser.add_argument('--video', default=None, type=str, help='The path of the video to evaluate.')
    parser.add_argument('--img_size', type=int, default=544, help='The image size for validation.')
    parser.add_argument('--traditional_nms', default=False, action='store_true', help='Whether to use traditional nms.')
    parser.add_argument('--hide_mask', default=False, action='store_true', help='Hide masks in results.')
    parser.add_argument('--hide_bbox', default=False, action='store_true', help='Hide boxes in results.')
    parser.add_argument('--hide_score', default=False, action='store_true', help='Hide scores in results.')
    parser.add_argument('--cutout', default=False, action='store_true', help='Cut out each object and save.')
    parser.add_argument('--save_lincomb', default=False, action='store_true', help='Show the generating process of masks.')
    parser.add_argument('--no_crop', default=False, action='store_true',
                        help='Do not crop the output masks with the predicted bounding box.')
    parser.add_argument('--real_time', default=False, action='store_true', help='Show the detection results real-timely.')
    parser.add_argument('--visual_thre', default=0.3, type=float,
                        help='Detections with a score under this threshold will be removed.')

    args = parser.parse_args()
    prefix = re.findall(r'best_\d+\.\d+_', args.weight)[0]
    suffix = re.findall(r'_\d+\.pth', args.weight)[0]
    args.cfg = args.weight.split(prefix)[-1].split(suffix)[0]
    cfg = get_config(args, mode='detect')

    net = Yolact(cfg)
    net.load_weights(cfg.weight, cfg.cuda)
    net.eval()

    if cfg.cuda:
        cudnn.benchmark = True
        cudnn.fastest = True
        net = net.cuda()

    # detect images
    if cfg.image is not None:
        dataset = COCODetection(cfg, mode='detect')
        data_loader = data.DataLoader(dataset, 1, num_workers=2, shuffle=False, pin_memory=True, collate_fn=detect_collate)
        ds = len(data_loader)
        assert ds > 0, 'No .jpg images found.'
        progress_bar = ProgressBar(40, ds)
        timer.reset()

        for i, (img, img_origin, img_name) in enumerate(data_loader):
            if i == 1:
                timer.start()

            if cfg.cuda:
                img = img.cuda()

            img_h, img_w = img_origin.shape[0:2]

            with torch.no_grad(), timer.counter('forward'):
                class_p, box_p, coef_p, proto_p = net(img)

            with timer.counter('nms'):
                ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg)

            with timer.counter('after_nms'):
                ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p,
                                                            img_h, img_w, cfg, img_name=img_name)

            with timer.counter('save_img'):
                img_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, img_origin, cfg, img_name=img_name)
                cv2.imwrite(f'results/images/{img_name}', img_numpy)

            aa = time.perf_counter()
            if i > 0:
                batch_time = aa - temp
                timer.add_batch_time(batch_time)
            temp = aa

            if i > 0:
                t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward',
                                                                    'nms', 'after_nms', 'save_img'])
                fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t
                bar_str = progress_bar.get_bar(i + 1)
                print(f'\rTesting: {bar_str} {i + 1}/{ds}, fps: {fps:.2f} | total fps: {t_fps:.2f} | '
                    f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | '
                    f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='')

        print('\nFinished, saved in: results/images.')

    # detect videos
    elif cfg.video is not None:
        vid = cv2.VideoCapture(cfg.video)

        target_fps = round(vid.get(cv2.CAP_PROP_FPS))
        frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))

        name = cfg.video.split('/')[-1]
        video_writer = cv2.VideoWriter(f'results/videos/{name}', cv2.VideoWriter_fourcc(*"mp4v"), target_fps,
                                    (frame_width, frame_height))

        progress_bar = ProgressBar(40, num_frames)
        timer.reset()
        t_fps = 0

        for i in range(num_frames):
            if i == 1:
                timer.start()

            frame_origin = vid.read()[1]
            img_h, img_w = frame_origin.shape[0:2]
            frame_trans = val_aug(frame_origin, cfg.img_size)

            frame_tensor = torch.tensor(frame_trans).float()
            if cfg.cuda:
                frame_tensor = frame_tensor.cuda()

            with torch.no_grad(), timer.counter('forward'):
                class_p, box_p, coef_p, proto_p = net(frame_tensor.unsqueeze(0))

            with timer.counter('nms'):
                ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg)

            with timer.counter('after_nms'):
                ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg)

            with timer.counter('save_img'):
                frame_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, frame_origin, cfg, fps=t_fps)

            if cfg.real_time:
                cv2.imshow('Detection', frame_numpy)
                cv2.waitKey(1)
            else:
                video_writer.write(frame_numpy)

            aa = time.perf_counter()
            if i > 0:
                batch_time = aa - temp
                timer.add_batch_time(batch_time)
            temp = aa

            if i > 0:
                t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward',
                                                                    'nms', 'after_nms', 'save_img'])
                fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t
                bar_str = progress_bar.get_bar(i + 1)
                print(f'\rDetecting: {bar_str} {i + 1}/{num_frames}, fps: {fps:.2f} | total fps: {t_fps:.2f} | '
                    f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | '
                    f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='')

        if not cfg.real_time:
            print(f'\n\nFinished, saved in: results/videos/{name}')

        vid.release()
        video_writer.release()
Beispiel #13
0
def savevideo(net:Yolact, in_path:str, out_path:str):

    vid = cv2.VideoCapture(in_path)

    target_fps   = round(vid.get(cv2.CAP_PROP_FPS))
    frame_width  = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    num_frames   = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    print("target_fps:{} frame_width:{} frame_height:{} num_frames:{}".format(target_fps, frame_width, frame_height, num_frames))

    out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height))
    frame_freq = 10
    transform = FastBaseTransform()
    frame_times = MovingAverage()
    progress_bar = ProgressBar(30, num_frames)
    preds = None
    drawing = True
    start_pt = None
    end_pt = None

    # load video here
    ret, frame = vid.read()

    # Adding Function Attached To Mouse Callback
    def draw(event,x,y,flags,params):
        nonlocal start_pt,end_pt,drawing
        # Left Mouse Button Down Pressed
        if(event==1):
            print("event 1 fired")
            if start_pt is None:
                start_pt = [x,y]
            else:
                end_pt = [x,y]
                cv2.destroyAllWindows()
                drawing = False
            print(x,y)

    # Making Window For The Image
    cv2.namedWindow("Window")

    # Adding Mouse CallBack Event
    cv2.setMouseCallback("Window",draw)

    cv2.imshow("Window",frame)
    cv2.waitKey(0)
    print(start_pt,end_pt)

    try:
        for i in range(num_frames):
            timer.reset()
            with timer.env('Video'):
              # process only 10th frame
              # care to be taken that the first frame is read always
              ret, full_frame = vid.read()
              if ret == True:
                 frame = full_frame[start_pt[1]:end_pt[1], start_pt[0]:end_pt[0]].copy()
                 # print("Full_frame Shape:", full_frame.shape, "frame shape:", frame.shape)
                 frame = torch.from_numpy(frame).cuda().float()
                 # need to adjust for multi frame
                 if i%frame_freq == 0:
                   batch = transform(frame.unsqueeze(0))
                   preds = net(batch)
                 current_preds = make_copy(preds)
                 processed= prep_display(current_preds, frame, None, None, undo_transform=False, class_color=True)
                 full_frame[start_pt[1]:end_pt[1], start_pt[0]:end_pt[0]] = processed
                 # print("Full_frame Shape:", full_frame.shape, "frame shape:", processed.shape)
                 out.write(full_frame)
            if i > 1:
                frame_times.add(timer.total_time())
                fps = 1 / frame_times.get_avg()
                progress = (i+1) / num_frames * 100
                progress_bar.set_val(i+1)

                print('\rProcessing Frames  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                    % (repr(progress_bar), i+1, num_frames, progress, fps), end='')
    except KeyboardInterrupt:
        print('Stopping early.')

    vid.release()
    out.release()
    print()
Beispiel #14
0
# not sure if this helps.
# shuffle must be False if sampler is specified
data_loader = data.DataLoader(dataset,
                              cfg.bs_per_gpu,
                              num_workers=cfg.bs_per_gpu // 2,
                              shuffle=(train_sampler is None),
                              collate_fn=train_collate,
                              pin_memory=False,
                              sampler=train_sampler)
# data_loader = data.DataLoader(dataset, cfg.bs_per_gpu, num_workers=0, shuffle=False,
#                               collate_fn=train_collate, pin_memory=True)

epoch_seed = 0
map_tables = []
training = True
timer.reset()
step = start_step
val_step = start_step
writer = SummaryWriter(f'tensorboard_log/{cfg_name}')

try:  # try-except can shut down all processes after Ctrl + C.
    while training:
        if train_sampler:
            epoch_seed += 1
            train_sampler.set_epoch(epoch_seed)

        for images, targets, masks in data_loader:
            if cfg.warmup_until > 0 and step <= cfg.warmup_until:  # warm up learning rate.
                for param_group in optimizer.param_groups:
                    param_group['lr'] = (cfg.lr - cfg.warmup_init) * (
                        step / cfg.warmup_until) + cfg.warmup_init
Beispiel #15
0
def evaluate(net,
             dataset,
             max_num=-1,
             during_training=False,
             benchmark=False,
             cocoapi=False,
             traditional_nms=False):
    frame_times = MovingAverage()
    dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset))
    dataset_indices = list(range(len(dataset)))
    dataset_indices = dataset_indices[:dataset_size]
    progress_bar = ProgressBar(40, dataset_size)

    if benchmark:
        timer.disable('Data loading')
    else:
        # For each class and iou, stores tuples (score, isPositive)
        # Index ap_data[type][iouIdx][classIdx]
        ap_data = {
            'box': [[APDataObject() for _ in cfg.dataset.class_names]
                    for _ in iou_thresholds],
            'mask': [[APDataObject() for _ in cfg.dataset.class_names]
                     for _ in iou_thresholds]
        }
        make_json = Make_json()

    for i, image_idx in enumerate(dataset_indices):
        timer.reset()

        with timer.env('Data loading'):
            img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)

            batch = Variable(img.unsqueeze(0))
            if cuda:
                batch = batch.cuda()

        with timer.env('Network forward'):
            net_outs = net(batch)
            nms_outs = NMS(net_outs, traditional_nms)

        if benchmark:
            prep_benchmark(nms_outs, h, w)
        else:
            prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd,
                         dataset.ids[image_idx], make_json, cocoapi)

        # First couple of images take longer because we're constructing the graph.
        # Since that's technically initialization, don't include those in the FPS calculations.
        fps = 0
        if i > 1 and not during_training:
            frame_times.add(timer.total_time())
            fps = 1 / frame_times.get_avg()

        progress = (i + 1) / dataset_size * 100
        progress_bar.set_val(i + 1)
        print('\rProcessing:  %s  %d / %d (%.2f%%)  %.2f fps  ' %
              (repr(progress_bar), i + 1, dataset_size, progress, fps),
              end='')

    if benchmark:
        print('\n\nStats for the last frame:')
        timer.print_stats()
        avg_seconds = frame_times.get_avg()
        print('Average: %5.2f fps, %5.2f ms' %
              (1 / frame_times.get_avg(), 1000 * avg_seconds))

    else:
        if cocoapi:
            make_json.dump()
            print(
                f'\nJson files dumped, saved in: {json_path}, start evaluting.'
            )

            gt_annotations = COCO(cfg.dataset.valid_info)
            bbox_dets = gt_annotations.loadRes(
                f'{json_path}/bbox_detections.json')
            mask_dets = gt_annotations.loadRes(
                f'{json_path}/mask_detections.json')

            print('\nEvaluating BBoxes:')
            bbox_eval = COCOeval(gt_annotations, bbox_dets, 'bbox')
            bbox_eval.evaluate()
            bbox_eval.accumulate()
            bbox_eval.summarize()

            print('\nEvaluating Masks:')
            bbox_eval = COCOeval(gt_annotations, mask_dets, 'segm')
            bbox_eval.evaluate()
            bbox_eval.accumulate()
            bbox_eval.summarize()
            return

        table, mask_row = calc_map(ap_data)
        print(table)
        return table, mask_row