예제 #1
0
def det_process(opts, frame_recv, det_res_send, w_img, h_img):
    try:
        model = init_detector(opts)

        # warm up the GPU
        _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8))
        torch.cuda.synchronize()

        while 1:
            fidx = frame_recv.recv()
            if type(fidx) is list:
                # new video, read all images in advance
                frame_list = fidx
                frames = [imread(img_path) for img_path in frame_list]
                # signal ready, no errors
                det_res_send.send('ready')
                continue
            elif fidx is None:
                # exit flag
                break
            fidx, t1 = fidx
            img = frames[fidx]
            t2 = perf_counter() 
            t_send_frame = t2 - t1

            result = inference_detector(model, img, gpu_pre=not opts.cpu_pre)
            torch.cuda.synchronize()

            t3 = perf_counter()
            det_res_send.send([result, t_send_frame, t3])

    except Exception:
        # report all errors from the child process to the parent
        # forward traceback info as well
        det_res_send.send(Exception("".join(traceback.format_exception(*sys.exc_info()))))
예제 #2
0
def det_test(opts, scale, imgs):
    assert len(imgs)
    opts.in_scale = scale
    opts.no_mask = True
    model = init_detector(opts)

    # warm up the GPU
    _ = inference_detector(model, imgs[0])
    torch.cuda.synchronize()

    runtime = []
    for img in imgs:
        t1 = perf_counter()
        _ = inference_detector(model, img, gpu_pre=not opts.cpu_pre)
        torch.cuda.synchronize()
        t2 = perf_counter()
        runtime.append(t2 - t1)

    return sum(runtime) / len(runtime)
예제 #3
0
def main():
    assert torch.cuda.device_count(
    ) == 1  # mmdet only supports single GPU testing

    opts = parse_args()

    mkdir2(opts.out_dir)
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get(
        'coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    model = init_detector(opts)
    results_raw = []  # image based, all 80 COCO classes
    results_ccf = []  # instance based

    for iid, img in tqdm(db.imgs.items()):
        img_name = img['name']

        sid = img['sid']
        seq_name = seqs[sid]

        img_path = join(opts.data_root, seq_dirs[sid], img_name)
        I = imread(img_path)

        result = inference_detector(model, I, gpu_pre=not opts.cpu_pre)
        results_raw.append(result)
        bboxes, scores, labels, masks = \
            parse_det_result(result, coco_mapping, n_class)

        if vis_out:
            vis_path = join(opts.vis_dir, seq_name, img_name[:-3] + 'jpg')
            if opts.overwrite or not isfile(vis_path):
                vis_det(I,
                        bboxes,
                        labels,
                        class_names,
                        masks,
                        scores,
                        out_scale=opts.vis_scale,
                        out_file=vis_path)

        # convert to coco fmt
        n = len(bboxes)
        if n:
            ltrb2ltwh_(bboxes)

        for i in range(n):
            result_dict = {
                'image_id': iid,
                'bbox': bboxes[i],
                'score': scores[i],
                'category_id': labels[i],
            }
            if masks is not None:
                result_dict['segmentation'] = masks[i]
            results_ccf.append(result_dict)

    out_path = join(opts.out_dir, 'results_raw.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_raw, open(out_path, 'wb'))

    out_path = join(opts.out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(opts.out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))
        if opts.eval_mask:
            print('Evaluating instance segmentation')
            eval_summary = eval_ccf(db, results_ccf, iou_type='segm')
            out_path = join(opts.out_dir, 'eval_summary_mask.pkl')
            if opts.overwrite or not isfile(out_path):
                pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}"')
예제 #4
0
def main():
    assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing

    opts = parse_args()
    assert (not opts.mask_timing) or (opts.mask_timing and not opts.no_mask)

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    model = init_detector(opts)

    # warm up the GPU
    img = db.imgs[0]
    w_img, h_img = img['width'], img['height']
    _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8))
    torch.cuda.synchronize()

    runtime_all = []
    n_processed = 0
    n_total = 0

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        
        # load all frames in advance
        frames = []
        for img in frame_list:
            img_path = join(opts.data_root, seq_dirs[sid], img['name'])
            frames.append(imread(img_path))
        n_frame = len(frames)
        n_total += n_frame
        
        timestamps = []
        results_raw = []
        results_parsed = []
        input_fidx = []
        runtime = []
        last_fidx = None
        if not opts.dynamic_schedule:
            stride_cnt = 0
        
        t_total = n_frame/opts.fps
        t_start = perf_counter()
        while 1:
            t1 = perf_counter()
            t_elapsed = t1 - t_start
            if t_elapsed >= t_total:
                break

            # identify latest available frame
            fidx_continous = t_elapsed*opts.fps
            fidx = int(np.floor(fidx_continous))
            if fidx == last_fidx:
                continue
            
            last_fidx = fidx
            if opts.dynamic_schedule:
                fidx_remainder = fidx_continous - fidx
                if fidx_remainder > 0.5:
                    continue
            else:
                if stride_cnt % opts.det_stride == 0:
                    stride_cnt = 1
                else:
                    stride_cnt += 1
                    continue

            frame = frames[fidx]
            result = inference_detector(model, frame, gpu_pre=not opts.cpu_pre, decode_mask=not opts.mask_timing)
            if opts.mask_timing:
                mask_encoded = result[2]
                result = result[:2]
                bboxes, scores, labels, _, sel = \
                    parse_det_result(result, coco_mapping, n_class, return_sel=True)
            else:
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)

            torch.cuda.synchronize()

            t2 = perf_counter()
            t_elapsed = t2 - t_start
            if t_elapsed >= t_total:
                break
            if opts.mask_timing:
                masks = decode_mask(mask_encoded, sel)

            timestamps.append(t_elapsed)
            results_raw.append(result)
            results_parsed.append((bboxes, scores, labels, masks))
            input_fidx.append(fidx)
            runtime.append(t2 - t1)

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump({
                'results_raw': results_raw,
                'results_parsed': results_parsed,
                'timestamps': timestamps,
                'input_fidx': input_fidx,
                'runtime': runtime,
            }, open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results_raw)

    runtime_all_np = np.asarray(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0/opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump({
            'runtime_all': runtime_all,
            'n_processed': n_processed,
            'n_total': n_total,
            'n_small_runtime': n_small_runtime,
        }, open(out_path, 'wb'))

    # convert to ms for display
    s2ms = lambda x: 1e3*x

    print(f'{n_processed}/{n_total} frames processed')
    print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms)
    print(f'Runtime smaller than unit time interval: '
        f'{n_small_runtime}/{n_processed} '
        f'({100.0*n_small_runtime/n_processed:.4g}%)')
예제 #5
0
def main():
    opts = parse_args()

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    if opts.cached_res:
        cache_in_ccf = '_ccf' in basename(opts.cached_res)
        if cache_in_ccf:
            # speed up based on the assumption of sequential storage
            cache_end_idx = 0
        cached_res = pickle.load(open(opts.cached_res, 'rb'))
    else:
        assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing
        model = init_detector(opts)

    np.random.seed(opts.seed)
    runtime = pickle.load(open(opts.runtime, 'rb'))
    runtime_dist = dist_from_dict(runtime, opts.perf_factor)

    runtime_all = []
    n_processed = 0
    n_total = 0

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        n_frame = len(frame_list)
        n_total += n_frame

        if not opts.cached_res:
            # load all frames in advance
            frames = []
            for img in frame_list:
                img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                frames.append(imread(img_path))
        
        timestamps = []
        results_parsed = []
        input_fidx = []
        runtime = []
        last_fidx = None
        if opts.cached_res and cache_in_ccf:
            results_raw = None
        else:
            results_raw = []
        
        t_total = n_frame/opts.fps
        t_elapsed = 0
        if opts.dynamic_schedule:
            mean_rtf = runtime_dist.mean()*opts.fps
        else:
            stride_cnt = 0

        while 1:
            if t_elapsed >= t_total:
                break

            # identify latest available frame
            fidx_continous = t_elapsed*opts.fps
            fidx = int(np.floor(fidx_continous))
            if fidx == last_fidx:
                # algorithm is fast and has some idle time
                fidx += 1
                if fidx == n_frame:
                    break
                t_elapsed = fidx/opts.fps
                
            last_fidx = fidx

            if opts.dynamic_schedule:
                if mean_rtf > 1:
                    # when runtime <= 1, it should always process every frame
                    fidx_remainder = fidx_continous - fidx
                    if mean_rtf < np.floor(fidx_remainder + mean_rtf):
                        # wait till next frame
                        continue
            else:
                if stride_cnt % opts.det_stride == 0:
                    stride_cnt = 1
                else:
                    stride_cnt += 1
                    continue

            if opts.cached_res:
                img = frame_list[fidx]
                if cache_in_ccf:
                    cache_end_idx, bboxes, scores, labels, masks = \
                        result_from_ccf(cached_res, img['id'], cache_end_idx)
                    ltwh2ltrb_(bboxes)
                else:
                    result = cached_res[img['id']]
                    bboxes, scores, labels, masks = \
                        parse_det_result(result, coco_mapping, n_class)
            else:
                frame = frames[fidx]
                result = inference_detector(model, frame)
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)

            rt_this = runtime_dist.draw()
            t_elapsed += rt_this
            if t_elapsed >= t_total:
                break
            
            timestamps.append(t_elapsed)
            if results_raw is not None:
                results_raw.append(result)
            results_parsed.append((bboxes, scores, labels, masks))
            input_fidx.append(fidx)
            runtime.append(rt_this)

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            out_dict = {
                'results_parsed': results_parsed,
                'timestamps': timestamps,
                'input_fidx': input_fidx,
                'runtime': runtime,
            }
            if results_raw is not None:
                out_dict['results_raw'] = results_raw
            pickle.dump(out_dict, open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results_parsed)

    runtime_all_np = np.array(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0/opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump({
            'runtime_all': runtime_all,
            'n_processed': n_processed,
            'n_total': n_total,
            'n_small_runtime': n_small_runtime,
        }, open(out_path, 'wb'))  

    # convert to ms for display
    s2ms = lambda x: 1e3*x

    print(f'{n_processed}/{n_total} frames processed')
    print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms)
    print(f'Runtime smaller than unit time interval: '
        f'{n_small_runtime}/{n_processed} '
        f'({100.0*n_small_runtime/n_processed:.4g}%)')
예제 #6
0
def main():
    assert torch.cuda.device_count(
    ) == 1  # mmdet only supports single GPU testing

    opts = parse_args()

    mkdir2(opts.out_dir)
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get(
        'coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    model = init_detector(opts)
    if opts.weights_base is not None:
        # for distillation purpose
        load_checkpoint(model, opts.weights_base)
    if opts.cpu_pre:
        img_transform = ImageTransform(
            size_divisor=model.cfg.data.test.size_divisor,
            **model.cfg.img_norm_cfg)
    else:
        img_transform = ImageTransformGPU(
            size_divisor=model.cfg.data.test.size_divisor,
            **model.cfg.img_norm_cfg)
    device = next(model.parameters()).device  # model device
    n_history = model.cfg.data.train.n_history if opts.n_history is None else opts.n_history
    n_future = model.cfg.data.train.n_future if opts.n_future is None else opts.n_future

    results_ccf = []  # instance based
    runtime_all = []
    for sid, seq in enumerate(tqdm(seqs)):
        # print(seq)
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        n_frame = len(frame_list)

        # load all frames in advance
        frames = []
        for img in frame_list:
            img_path = join(opts.data_root, seq_dirs[sid], img['name'])
            frames.append(imread(img_path))

        with torch.no_grad():
            preprocessed = []
            for i in range(n_history):
                data = _prepare_data(frames[i], img_transform, model.cfg,
                                     device)
                preprocessed.append(data)
            for ii in range(n_history, n_frame - n_future):
                # target frame
                iid = frame_list[ii + n_future]['id']
                img_name = frame_list[ii + n_future]['name']
                I = frames[ii + n_future]

                t_start = perf_counter()
                # input frame
                data = _prepare_data(frames[ii], img_transform, model.cfg,
                                     device)
                # if n_history == 0:
                #     data_merge = data
                #     # print(data['img'])
                #     # print(data['img'][0].shape)
                #     # print(data['img'][0][0][0][300][300:305])
                #     # import sys
                #     # sys.exit()
                # else:
                preprocessed.append(data)
                # print(preprocessed[0]['img'][0].data_ptr())
                # print(preprocessed[2]['img'][0].data_ptr())
                # print(torch.all(preprocessed[0]['img'][0] == preprocessed[2]['img'][0]))
                imgs = [d['img'][0] for d in preprocessed]
                imgs = torch.cat(imgs, 0)
                imgs = imgs.unsqueeze(0)
                data_merge = {
                    'img': [imgs],
                    'img_meta': data['img_meta'],
                }
                # print(data_merge['img'][0][0][2][0][300][300:305])
                # import sys
                # sys.exit()
                result = model(return_loss=False,
                               rescale=True,
                               numpy_res=True,
                               **data_merge)
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)
                # if ii == 2:
                #     print(ii, scores)
                #     import sys
                #     sys.exit()

                # if n_history != 0:
                del preprocessed[0]
                t_end = perf_counter()
                runtime_all.append(t_end - t_start)

                if vis_out:
                    vis_path = join(opts.vis_dir, seq, img_name[:-3] + 'jpg')
                    if opts.overwrite or not isfile(vis_path):
                        vis_det(I,
                                bboxes,
                                labels,
                                class_names,
                                masks,
                                scores,
                                out_scale=opts.vis_scale,
                                out_file=vis_path)

                # convert to coco fmt
                n = len(bboxes)
                if n:
                    bboxes[:, 2:] -= bboxes[:, :2]

                for i in range(n):
                    result_dict = {
                        'image_id': iid,
                        'bbox': bboxes[i],
                        'score': scores[i],
                        'category_id': labels[i],
                    }
                    if masks is not None:
                        result_dict['segmentation'] = masks[i]
                    results_ccf.append(result_dict)

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump({
            'runtime_all': runtime_all,
            'n_total': len(runtime_all),
        }, open(out_path, 'wb'))

    # convert to ms for display
    s2ms = lambda x: 1e3 * x

    print_stats(runtime_all, 'Runtime (ms)', cvt=s2ms)

    out_path = join(opts.out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(opts.out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}"')
예제 #7
0
def main():
    assert torch.cuda.device_count(
    ) == 1  # mmdet only supports single GPU testing

    opts = parse_args()

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    model = init_detector(opts)

    # warm up the GPU
    img = db.imgs[0]
    w_img, h_img = img['width'], img['height']
    _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8))
    torch.cuda.synchronize()

    runtime_all = []
    n_processed = 0
    n_total = 0

    # global pr
    import cProfile
    pr = cProfile.Profile()

    for sid, seq in enumerate(tqdm(seqs)):
        # if sid > 1:
        #     break
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]

        # load all frames in advance

        frames = []
        for img in frame_list:
            img_path = join(opts.data_root, seq_dirs[sid], img['name'])
            frames.append(imread(img_path))
        n_frame = len(frames)
        n_total += n_frame

        timestamps = []
        results = []
        input_fidx = []
        runtime = []
        last_fidx = None

        t_total = n_frame / opts.fps
        t_start = perf_counter()
        while 1:
            t1 = perf_counter()
            t_elapsed = t1 - t_start
            if t_elapsed >= t_total:
                break

            # identify latest available frame
            fidx = int(np.floor(t_elapsed * opts.fps))
            #   t_elapsed/t_total *n_frame
            # = t_elapsed*opts.fps
            if fidx == last_fidx:
                continue
            last_fidx = fidx
            frame = frames[fidx]

            pr.enable()
            result = inference_detector(model, frame)
            torch.cuda.synchronize()
            pr.disable()

            t2 = perf_counter()
            t_elapsed = t2 - t_start
            if t_elapsed >= t_total:
                break

            timestamps.append(t_elapsed)
            results.append(result)
            input_fidx.append(fidx)
            runtime.append(t2 - t1)

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump((
                results,
                timestamps,
                input_fidx,
                runtime,
            ), open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results)

    pr.dump_stats('_par_/mrcnn50_s0.5_1080ti_gpupre_blocking.prof')
    # pr.dump_stats('debug.prof')

    runtime_all_np = np.array(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0 / opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(
            {
                'runtime_all': runtime_all,
                'n_processed': n_processed,
                'n_total': n_total,
                'n_small_runtime': n_small_runtime,
            }, open(out_path, 'wb'))
    # convert to ms for display
    runtime_all_np *= 1e3

    print(f'{n_processed}/{n_total} frames processed')
    print('Runtime (ms): mean: %g; std: %g; min: %g; max: %g' % (
        runtime_all_np.mean(),
        runtime_all_np.std(ddof=1),
        runtime_all_np.min(),
        runtime_all_np.max(),
    ))
    print(f'Runtime smaller than unit time interval: '
          f'{n_small_runtime}/{n_processed} '
          f'({100.0*n_small_runtime/n_processed:.4g}%)')
def main():
    opts = parse_args()

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    if opts.cached_res:
        cache_in_ccf = '_ccf' in basename(opts.cached_res)
        if cache_in_ccf:
            # speed up based on the assumption of sequential storage
            cache_end_idx = 0
        cached_res = pickle.load(open(opts.cached_res, 'rb'))
    else:
        assert torch.cuda.device_count(
        ) == 1  # mmdet only supports single GPU testing
        model = init_detector(opts)

    np.random.seed(opts.seed)
    runtime = pickle.load(open(opts.runtime, 'rb'))
    runtime_dist = dist_from_dict(runtime, opts.perf_factor)

    runtime_all = []
    n_processed = 0
    n_total = 0

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        n_frame = len(frame_list)
        n_total += n_frame

        if not opts.cached_res:
            # load all frames in advance
            frames = []
            for img in frame_list:
                img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                frames.append(imread(img_path))

        timestamps = []
        results_raw = []
        results_parsed = []
        input_fidx = []
        runtime = []
        if opts.cached_res and cache_in_ccf:
            results_raw = None
        else:
            results_raw = []

        for ii in range(n_frame):
            t = ii / opts.fps

            if opts.cached_res:
                img = frame_list[ii]
                if cache_in_ccf:
                    cache_end_idx, bboxes, scores, labels, masks = \
                        result_from_ccf(cached_res, img['id'], cache_end_idx)
                    ltwh2ltrb_(bboxes)
                else:
                    result = cached_res[img['id']]
                    bboxes, scores, labels, masks = \
                        parse_det_result(result, coco_mapping, n_class)
            else:
                frame = frames[ii]
                result = inference_detector(model, frame)
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)

            rt_this = runtime_dist.draw()
            t_finishing = t + rt_this

            timestamps.append(t_finishing)
            if results_raw is not None:
                results_raw.append(result)
            results_parsed.append((bboxes, scores, labels, masks))
            input_fidx.append(ii)
            runtime.append(rt_this)

        # since parallel excecution, the order is not guaranteed
        idx = np.argsort(timestamps)
        timestamps = [timestamps[i] for i in idx]
        if results_raw is not None:
            results_raw = [results_raw[i] for i in idx]
        results_parsed = [results_parsed[i] for i in idx]
        input_fidx = [input_fidx[i] for i in idx]
        runtime = [runtime[i] for i in idx]

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            out_dict = {
                'results_parsed': results_parsed,
                'timestamps': timestamps,
                'input_fidx': input_fidx,
                'runtime': runtime,
            }
            if results_raw is not None:
                out_dict['results_raw'] = results_raw
            pickle.dump(out_dict, open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results_parsed)

    runtime_all_np = np.array(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0 / opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(
            {
                'runtime_all': runtime_all,
                'n_processed': n_processed,
                'n_total': n_total,
                'n_small_runtime': n_small_runtime,
            }, open(out_path, 'wb'))

    # convert to ms for display
    s2ms = lambda x: 1e3 * x

    print(f'{n_processed}/{n_total} frames processed')
    print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms)
    print(f'Runtime smaller than unit time interval: '
          f'{n_small_runtime}/{n_processed} '
          f'({100.0*n_small_runtime/n_processed:.4g}%)')