Exemple #1
0
def det_process(opts, frame_recv, det_res_send, w_img, h_img):
    try:
        model = init_detector(opts)

        # warm up the GPU
        _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8))
        torch.cuda.synchronize()

        while 1:
            fidx = frame_recv.recv()
            if type(fidx) is list:
                # new video, read all images in advance
                frame_list = fidx
                frames = [imread(img_path) for img_path in frame_list]
                # signal ready, no errors
                det_res_send.send('ready')
                continue
            elif fidx is None:
                # exit flag
                break
            fidx, t1 = fidx
            img = frames[fidx]
            t2 = perf_counter() 
            t_send_frame = t2 - t1

            result = inference_detector(model, img, gpu_pre=not opts.cpu_pre)
            torch.cuda.synchronize()

            t3 = perf_counter()
            det_res_send.send([result, t_send_frame, t3])

    except Exception:
        # report all errors from the child process to the parent
        # forward traceback info as well
        det_res_send.send(Exception("".join(traceback.format_exception(*sys.exc_info()))))
Exemple #2
0
def main():
    opts = parse_args()

    mkdir2(opts.out_dir)

    print('Reading detection images')
    det_img_files = sorted(glob(join(opts.det_img_dir, '*.jpg')))
    det_imgs = []
    for path in det_img_files:
        det_imgs.append(imread(path))

    print('Detection Test at Scale 0.5')
    rt_det1 = det_test(opts, 0.5, det_imgs)
    print(f'Mean runtime (ms): {1e3*rt_det1:.3g}')

    print('Detection Test at Scale 1')
    rt_det2 = det_test(opts, 1, det_imgs)
    print(f'Mean runtime (ms): {1e3*rt_det2:.3g}')
    det_imgs = None

    print('Classification Test')
    rt_inf, rt_train = classify_test(opts)
    print(f'Inference Mean runtime (ms): {1e3*rt_inf:.3g}')
    print(f'Training Mean runtime (ms): {1e3*rt_train:.3g}')

    out_path = join(opts.out_dir, 'platform_bench.txt')
    if opts.overwrite or not isfile(out_path):
        with open(out_path, 'w', newline='\n') as f:
            writer = csv.writer(f)
            writer.writerow([rt_det1, rt_det2, rt_inf, rt_train])
        print(f'Saved results to "{out_path}"')
Exemple #3
0
def main():
    opts = parse_args()
    mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    seq = 'f1008c18-e76e-3c24-adcc-da9858fac145'
    sid = seqs.index(seq)

    frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
    results = pickle.load(open(join(opts.result_dir, seq + '.pkl'), 'rb'))
    results_parsed = results['results_parsed']

    img = frame_list[153]
    result = results_parsed[15]
    bboxes, scores, labels, masks = result[:4]
    idx = [16]
    bboxes = bboxes[idx]
    scores = scores[idx]
    labels = labels[idx]
    masks = masks[idx]

    img_path = join(opts.data_root, seq_dirs[sid], img['name'])
    I = imread(img_path).copy()
    vis_path = join(opts.vis_dir, 'teaser.jpg')
    if opts.overwrite or not isfile(vis_path):
        vis_det(
            I,
            bboxes,
            labels,
            class_names,
            masks,
            None,
            out_scale=opts.vis_scale,
            out_file=vis_path,
        )
Exemple #4
0
def main():
    opts = parse_args()

    out_dir = mkdir2(opts.out_dir) if opts.out_dir else opts.result_dir
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get(
        'coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    results_ccf = []
    in_time = 0
    miss = 0
    mismatch = 0

    print('Pairing the output with the ground truth')

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]

        results = pickle.load(open(join(opts.result_dir, seq + '.pkl'), 'rb'))
        # use raw results when possible in case we change class subset during evaluation
        if opts.use_parsed:
            results_parsed = results['results_parsed']
        else:
            results_raw = results.get('results_raw', None)
            if results_raw is None:
                results_parsed = results['results_parsed']
        timestamps = results['timestamps']
        input_fidx = results['input_fidx']

        tidx_p1 = 0
        for ii, img in enumerate(frame_list):
            # pred, gt association by time
            t = (ii - opts.eta) / opts.fps
            while tidx_p1 < len(timestamps) and timestamps[tidx_p1] <= t:
                tidx_p1 += 1
            if tidx_p1 == 0:
                # no output
                miss += 1
                bboxes, scores, labels = [], [], []
                masks, tracks = None, None
            else:
                tidx = tidx_p1 - 1
                ifidx = input_fidx[tidx]
                in_time += int(ii == ifidx)
                mismatch += ii - ifidx

                if opts.use_parsed or results_raw is None:
                    result = results_parsed[tidx]
                    bboxes, scores, labels, masks = result[:4]
                    if len(result) > 4:
                        tracks = result[4]
                    else:
                        tracks = None
                else:
                    result = results_raw[tidx]
                    bboxes, scores, labels, masks = \
                        parse_det_result(result, coco_mapping, n_class)
                    tracks = None

            if vis_out:
                img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                I = imread(img_path)
                vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg')
                if opts.overwrite or not isfile(vis_path):
                    if tracks is None:
                        vis_det(
                            I,
                            bboxes,
                            labels,
                            class_names,
                            masks,
                            scores,
                            out_scale=opts.vis_scale,
                            out_file=vis_path,
                        )
                    else:
                        vis_track(
                            I,
                            bboxes,
                            tracks,
                            labels,
                            class_names,
                            masks,
                            scores,
                            out_scale=opts.vis_scale,
                            out_file=vis_path,
                        )

            # convert to coco fmt
            n = len(bboxes)
            if n:
                bboxes_ltwh = ltrb2ltwh(bboxes)

            for i in range(n):
                result_dict = {
                    'image_id': img['id'],
                    'bbox': bboxes_ltwh[i],
                    'score': scores[i],
                    'category_id': labels[i],
                }
                if masks is not None:
                    result_dict['segmentation'] = masks[i]

                results_ccf.append(result_dict)

    out_path = join(out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    out_path = join(out_dir, 'eval_assoc.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump({
            'miss': miss,
            'in_time': in_time,
            'mismatch': mismatch,
        }, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))
        if opts.eval_mask:
            print('Evaluating instance segmentation')
            eval_summary = eval_ccf(db, results_ccf, iou_type='segm')
            out_path = join(out_dir, 'eval_summary_mask.pkl')
            if opts.overwrite or not isfile(out_path):
                pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')
class_names = [c['name'] for c in cats]

last_sid = None
aidx = 0
for img in tqdm(imgs):
    img_name = img['name']
    iid = img['id']
    sid = img['sid']

    if sid != last_sid:
        in_dir_seq = join(data_root, seq_dirs[sid])
        out_dir_seq = join(out_dir, seqs[sid])
        last_sid = sid

    I = imread(join(in_dir_seq, img['name']))

    bboxes = []
    masks = []
    labels = []
    if show_track:
        tracks = []

    while aidx < len(annots):
        ann = annots[aidx]
        if ann['image_id'] != iid:
            break
        bboxes.append(ann['bbox'])
        if 'segmentation' in ann:
            masks.append(ann['segmentation'])
        else:
Exemple #6
0
def main():
    assert torch.cuda.device_count(
    ) == 1  # mmdet only supports single GPU testing

    opts = parse_args()

    mkdir2(opts.out_dir)
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get(
        'coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    model = init_detector(opts)
    results_raw = []  # image based, all 80 COCO classes
    results_ccf = []  # instance based

    for iid, img in tqdm(db.imgs.items()):
        img_name = img['name']

        sid = img['sid']
        seq_name = seqs[sid]

        img_path = join(opts.data_root, seq_dirs[sid], img_name)
        I = imread(img_path)

        result = inference_detector(model, I, gpu_pre=not opts.cpu_pre)
        results_raw.append(result)
        bboxes, scores, labels, masks = \
            parse_det_result(result, coco_mapping, n_class)

        if vis_out:
            vis_path = join(opts.vis_dir, seq_name, img_name[:-3] + 'jpg')
            if opts.overwrite or not isfile(vis_path):
                vis_det(I,
                        bboxes,
                        labels,
                        class_names,
                        masks,
                        scores,
                        out_scale=opts.vis_scale,
                        out_file=vis_path)

        # convert to coco fmt
        n = len(bboxes)
        if n:
            ltrb2ltwh_(bboxes)

        for i in range(n):
            result_dict = {
                'image_id': iid,
                'bbox': bboxes[i],
                'score': scores[i],
                'category_id': labels[i],
            }
            if masks is not None:
                result_dict['segmentation'] = masks[i]
            results_ccf.append(result_dict)

    out_path = join(opts.out_dir, 'results_raw.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_raw, open(out_path, 'wb'))

    out_path = join(opts.out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(opts.out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))
        if opts.eval_mask:
            print('Evaluating instance segmentation')
            eval_summary = eval_ccf(db, results_ccf, iou_type='segm')
            out_path = join(opts.out_dir, 'eval_summary_mask.pkl')
            if opts.overwrite or not isfile(out_path):
                pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}"')
Exemple #7
0
def main():
    assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing

    opts = parse_args()
    assert (not opts.mask_timing) or (opts.mask_timing and not opts.no_mask)

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    model = init_detector(opts)

    # warm up the GPU
    img = db.imgs[0]
    w_img, h_img = img['width'], img['height']
    _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8))
    torch.cuda.synchronize()

    runtime_all = []
    n_processed = 0
    n_total = 0

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        
        # load all frames in advance
        frames = []
        for img in frame_list:
            img_path = join(opts.data_root, seq_dirs[sid], img['name'])
            frames.append(imread(img_path))
        n_frame = len(frames)
        n_total += n_frame
        
        timestamps = []
        results_raw = []
        results_parsed = []
        input_fidx = []
        runtime = []
        last_fidx = None
        if not opts.dynamic_schedule:
            stride_cnt = 0
        
        t_total = n_frame/opts.fps
        t_start = perf_counter()
        while 1:
            t1 = perf_counter()
            t_elapsed = t1 - t_start
            if t_elapsed >= t_total:
                break

            # identify latest available frame
            fidx_continous = t_elapsed*opts.fps
            fidx = int(np.floor(fidx_continous))
            if fidx == last_fidx:
                continue
            
            last_fidx = fidx
            if opts.dynamic_schedule:
                fidx_remainder = fidx_continous - fidx
                if fidx_remainder > 0.5:
                    continue
            else:
                if stride_cnt % opts.det_stride == 0:
                    stride_cnt = 1
                else:
                    stride_cnt += 1
                    continue

            frame = frames[fidx]
            result = inference_detector(model, frame, gpu_pre=not opts.cpu_pre, decode_mask=not opts.mask_timing)
            if opts.mask_timing:
                mask_encoded = result[2]
                result = result[:2]
                bboxes, scores, labels, _, sel = \
                    parse_det_result(result, coco_mapping, n_class, return_sel=True)
            else:
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)

            torch.cuda.synchronize()

            t2 = perf_counter()
            t_elapsed = t2 - t_start
            if t_elapsed >= t_total:
                break
            if opts.mask_timing:
                masks = decode_mask(mask_encoded, sel)

            timestamps.append(t_elapsed)
            results_raw.append(result)
            results_parsed.append((bboxes, scores, labels, masks))
            input_fidx.append(fidx)
            runtime.append(t2 - t1)

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump({
                'results_raw': results_raw,
                'results_parsed': results_parsed,
                'timestamps': timestamps,
                'input_fidx': input_fidx,
                'runtime': runtime,
            }, open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results_raw)

    runtime_all_np = np.asarray(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0/opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump({
            'runtime_all': runtime_all,
            'n_processed': n_processed,
            'n_total': n_total,
            'n_small_runtime': n_small_runtime,
        }, open(out_path, 'wb'))

    # convert to ms for display
    s2ms = lambda x: 1e3*x

    print(f'{n_processed}/{n_total} frames processed')
    print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms)
    print(f'Runtime smaller than unit time interval: '
        f'{n_small_runtime}/{n_processed} '
        f'({100.0*n_small_runtime/n_processed:.4g}%)')
Exemple #8
0
def main():
    opts = parse_args()

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    if opts.cached_res:
        cache_in_ccf = '_ccf' in basename(opts.cached_res)
        if cache_in_ccf:
            # speed up based on the assumption of sequential storage
            cache_end_idx = 0
        cached_res = pickle.load(open(opts.cached_res, 'rb'))
    else:
        assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing
        model = init_detector(opts)

    np.random.seed(opts.seed)
    runtime = pickle.load(open(opts.runtime, 'rb'))
    runtime_dist = dist_from_dict(runtime, opts.perf_factor)

    runtime_all = []
    n_processed = 0
    n_total = 0

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        n_frame = len(frame_list)
        n_total += n_frame

        if not opts.cached_res:
            # load all frames in advance
            frames = []
            for img in frame_list:
                img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                frames.append(imread(img_path))
        
        timestamps = []
        results_parsed = []
        input_fidx = []
        runtime = []
        last_fidx = None
        if opts.cached_res and cache_in_ccf:
            results_raw = None
        else:
            results_raw = []
        
        t_total = n_frame/opts.fps
        t_elapsed = 0
        if opts.dynamic_schedule:
            mean_rtf = runtime_dist.mean()*opts.fps
        else:
            stride_cnt = 0

        while 1:
            if t_elapsed >= t_total:
                break

            # identify latest available frame
            fidx_continous = t_elapsed*opts.fps
            fidx = int(np.floor(fidx_continous))
            if fidx == last_fidx:
                # algorithm is fast and has some idle time
                fidx += 1
                if fidx == n_frame:
                    break
                t_elapsed = fidx/opts.fps
                
            last_fidx = fidx

            if opts.dynamic_schedule:
                if mean_rtf > 1:
                    # when runtime <= 1, it should always process every frame
                    fidx_remainder = fidx_continous - fidx
                    if mean_rtf < np.floor(fidx_remainder + mean_rtf):
                        # wait till next frame
                        continue
            else:
                if stride_cnt % opts.det_stride == 0:
                    stride_cnt = 1
                else:
                    stride_cnt += 1
                    continue

            if opts.cached_res:
                img = frame_list[fidx]
                if cache_in_ccf:
                    cache_end_idx, bboxes, scores, labels, masks = \
                        result_from_ccf(cached_res, img['id'], cache_end_idx)
                    ltwh2ltrb_(bboxes)
                else:
                    result = cached_res[img['id']]
                    bboxes, scores, labels, masks = \
                        parse_det_result(result, coco_mapping, n_class)
            else:
                frame = frames[fidx]
                result = inference_detector(model, frame)
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)

            rt_this = runtime_dist.draw()
            t_elapsed += rt_this
            if t_elapsed >= t_total:
                break
            
            timestamps.append(t_elapsed)
            if results_raw is not None:
                results_raw.append(result)
            results_parsed.append((bboxes, scores, labels, masks))
            input_fidx.append(fidx)
            runtime.append(rt_this)

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            out_dict = {
                'results_parsed': results_parsed,
                'timestamps': timestamps,
                'input_fidx': input_fidx,
                'runtime': runtime,
            }
            if results_raw is not None:
                out_dict['results_raw'] = results_raw
            pickle.dump(out_dict, open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results_parsed)

    runtime_all_np = np.array(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0/opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump({
            'runtime_all': runtime_all,
            'n_processed': n_processed,
            'n_total': n_total,
            'n_small_runtime': n_small_runtime,
        }, open(out_path, 'wb'))  

    # convert to ms for display
    s2ms = lambda x: 1e3*x

    print(f'{n_processed}/{n_total} frames processed')
    print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms)
    print(f'Runtime smaller than unit time interval: '
        f'{n_small_runtime}/{n_processed} '
        f'({100.0*n_small_runtime/n_processed:.4g}%)')
Exemple #9
0
def vis_obj_fancy(
    img,
    bboxes,
    labels,
    class_names,
    color_palette,
    color_scheme='class',
    masks=None,
    scores=None,
    score_th=0.3,
    tracks=None,
    iscrowd=None,
    out_scale=1,
    out_file=None,
    show_label=True,
    show_score=True,
    font=None,
):
    thickness = 2
    alpha = 0.2

    if isinstance(img, str):
        img = imread(img)
        img = np.array(img)  # create a writable copy
    if out_scale != 1:
        img = cv2.resize(
            img,
            fx=out_scale,
            fy=out_scale,
            interpolation=cv2.INTER_LINEAR,
        )
    bboxes = np.asarray(bboxes)
    labels = np.asarray(labels)
    if masks is not None:
        masks = np.asarray(masks)

    empty = len(bboxes) == 0
    if not empty and scores is not None and score_th > 0:
        sel = scores >= score_th
        bboxes = bboxes[sel]
        labels = labels[sel]
        if masks is not None:
            masks = masks[sel]
        empty = len(bboxes) == 0

    if empty:
        if out_file is not None:
            imwrite(img, out_file)
        return img

    if out_scale != 1:
        bboxes = out_scale * bboxes

    bboxes = bboxes.round().astype(np.int32)

    # draw masks or filled rectangles
    if masks is None:
        img_filled = img.copy()
        for i, (bbox, label) in enumerate(zip(bboxes, labels)):
            if color_scheme == 'class':
                color = color_palette[label]
            cv2.rectangle(
                img,
                (bbox[0], bbox[1]),
                (bbox[2], bbox[3]),
                color,
                thickness=-1,
            )
        img = cv2.addWeighted(img_filled, (1 - alpha), img, alpha, 0)
        # draw box contours
        for i, (bbox, label) in enumerate(zip(bboxes, labels)):
            if color_scheme == 'class':
                color = color_palette[label]
            cv2.rectangle(
                img,
                (bbox[0], bbox[1]),
                (bbox[2], bbox[3]),
                color,
                thickness=thickness,
            )
    else:
        for i, (mask, label) in enumerate(zip(masks, labels)):
            if color_scheme == 'class':
                color = np.array(color_palette[label])
            m = maskUtils.decode(mask)
            if out_scale != 1:
                m = cv2.resize(
                    m.astype(np.uint8),
                    fx=out_scale,
                    fy=out_scale,
                    interpolation=cv2.INTER_NEAREST,
                )
            m = m.astype(np.bool)
            img[m] = (1 - alpha) * img[m] + alpha * color
            b = find_boundaries(m)
            img[b] = color

    # put label text
    if show_label or show_score:
        if font is not None:
            # using TrueType supported in PIL
            img = Image.fromarray(img)
            draw = ImageDraw.Draw(img)

        for i, (bbox, label) in enumerate(zip(bboxes, labels)):
            if color_scheme == 'class':
                color = color_palette[label]

            lt = (bbox[0], bbox[1])

            if show_label:
                if class_names is None:
                    label_text = f'class {label}'
                else:
                    label_text = class_names[label]
            else:
                label_text = ''
            if show_score and scores is not None:
                score_text = f'{scores[i]:.02f}'
            else:
                score_text = ''
            if label_text and score_text:
                text = label_text + ' - ' + score_text
            elif label_text:
                text = label_text
            elif score_text:
                text = score_text
            else:
                text = ''

            if font is None:
                cv2.putText(
                    img,
                    text,
                    (lt[0], lt[1] - 2),
                    cv2.FONT_HERSHEY_DUPLEX,
                    1.7,
                    color,
                    thickness=1,
                )
            else:
                draw.text(
                    (lt[0], lt[1] - font.size),
                    text,
                    (*color, 1),  # RGBA
                    font=font,
                )
        if font is not None:
            img = np.asarray(img)

    if out_file is not None:
        imwrite(img, out_file)
    return img
def main():
    opts = parse_args()

    mkdir2(opts.out_dir)
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    box_ccf = pickle.load(open(opts.box_ccf_path, 'rb'))
    mask_ccf = pickle.load(open(opts.mask_ccf_path, 'rb'))
    box_end_idx = 0
    mask_end_idx = 0

    results_ccf = []        # instance based

    for iid, img in tqdm(db.imgs.items()):
        img_name = img['name']

        sid = img['sid']
        seq_name = seqs[sid]
        
        box_start_idx = box_end_idx
        while box_start_idx < len(box_ccf) and box_ccf[box_start_idx]['image_id'] < img['id']:
            box_start_idx += 1
        box_end_idx = box_start_idx
        while box_end_idx < len(box_ccf) and box_ccf[box_end_idx]['image_id'] == img['id']:
            box_end_idx += 1
        box_dets = box_ccf[box_start_idx:box_end_idx]
        
        mask_start_idx = mask_end_idx
        while mask_start_idx < len(mask_ccf) and mask_ccf[mask_start_idx]['image_id'] < img['id']:
            mask_start_idx += 1
        mask_end_idx = mask_start_idx
        while mask_end_idx < len(mask_ccf) and mask_ccf[mask_end_idx]['image_id'] == img['id']:
            mask_end_idx += 1
        mask_dets = mask_ccf[mask_start_idx:mask_end_idx]

        if len(box_dets) == 0:
            bboxes1, scores1, labels1, masks1 = [], [], [], None
        elif len(mask_dets) == 0:
            bboxes1 = np.array([d['bbox'] for d in box_dets])
            scores1 = np.array([d['score'] for d in box_dets])
            labels1 = np.array([d['category_id'] for d in box_dets])
            masks1 = np.array([d['segmentation'] for d in box_dets])
        else:
            # the slow version, but works with out of order ccf results
            # dets = [r for r in box_ccf if r['image_id'] == img['id']]
            
            bboxes1 = np.array([d['bbox'] for d in box_dets])
            scores1 = np.array([d['score'] for d in box_dets])
            labels1 = np.array([d['category_id'] for d in box_dets])
            masks1 = np.array([d['segmentation'] for d in box_dets])
            
            bboxes2 = np.array([d['bbox'] for d in mask_dets])
            scores2 = np.array([d['score'] for d in mask_dets])
            labels2 = np.array([d['category_id'] for d in mask_dets])
            masks2 = np.array([d['segmentation'] for d in mask_dets])

            score_argsort = np.argsort(scores1)[::-1]
            bboxes1 = bboxes1[score_argsort]
            scores1 = scores1[score_argsort]
            labels1 = labels1[score_argsort]
            masks1 = masks1[score_argsort]
        
            score_argsort = np.argsort(scores2)[::-1]
            bboxes2 = bboxes2[score_argsort]
            scores2 = scores2[score_argsort]
            labels2 = labels2[score_argsort]
            masks2 = masks2[score_argsort]

            order1, order2, n_matched12, _, _ = iou_assoc(
                bboxes1, labels1, np.arange(len(bboxes1)), 0,
                bboxes2, labels2, opts.match_iou_th,
                no_unmatched1=False,
            )

            bboxes1 = bboxes1[order1]
            scores1 = scores1[order1]
            labels1 = labels1[order1]
            masks1 = masks1[order1]

            bboxes2 = bboxes2[order2]
            scores2 = scores2[order2]
            labels2 = labels2[order2]
            masks2 = masks2[order2]

            mask1_fix = warp_mask_to_box(
                masks2[:n_matched12],
                bboxes2[:n_matched12],
                bboxes1[:n_matched12]
            )

            masks1 = np.concatenate((mask1_fix, masks1[n_matched12:]))

        if vis_out:
            img_path = join(opts.data_root, seq_dirs[sid], img_name)
            I = imread(img_path)
            vis_path = join(opts.vis_dir, seq_name, img_name[:-3] + 'jpg')
            bboxes = ltwh2ltrb(bboxes1) if len(bboxes1) else []
            if opts.overwrite or not isfile(vis_path):
                vis_det(
                    I, bboxes, labels1,
                    class_names, masks1, scores1,
                    out_scale=opts.vis_scale,
                    out_file=vis_path
                )

        n = len(bboxes1)
        for i in range(n):
            result_dict = {
                'image_id': iid,
                'bbox': bboxes1[i],
                'score': scores1[i],
                'category_id': labels1[i],
            }
            if masks1 is not None:
                result_dict['segmentation'] = masks1[i]
            results_ccf.append(result_dict)


    out_path = join(opts.out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(opts.out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))
        if opts.eval_mask:
            print('Evaluating instance segmentation')
            eval_summary = eval_ccf(db, results_ccf, iou_type='segm')
            out_path = join(opts.out_dir, 'eval_summary_mask.pkl')
            if opts.overwrite or not isfile(out_path):
                pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}"')
Exemple #11
0
def main():
    opts = parse_args()

    # caches = [
    #     join(opts.exp_dir, 'ArgoVerse1.1-c3-eta0/output/retina50_s0.2/val/results_ccf.pkl'),
    #     join(opts.exp_dir, 'ArgoVerse1.1-c3-eta0/output/mrcnn50_nm_s0.5/val/results_ccf.pkl'),
    #     join(opts.exp_dir, 'ArgoVerse1.1-c3-eta0/output/mrcnn50_nm_s0.75/val/results_ccf.pkl'),
    # ]

    # # should be sorted
    # rtfs = [1, 2, 3]

    # caches = [
    #     join(opts.exp_dir, 'ArgoVerse1.1/output/mrcnn50_nm_th0_s0.75/val/results_ccf.pkl'),
    #     join(opts.exp_dir, 'ArgoVerse1.1/output/mrcnn50_nm_th0_s1.0/val/results_ccf.pkl'),
    # ]

    # # should be sorted
    # rtfs = [3, 5]

    caches = [
        join(opts.exp_dir, 'ArgoVerse1.1-c3-eta0/output/mrcnn50_nm_s0.75/val/results_ccf.pkl'),
        join(opts.exp_dir, 'ArgoVerse1.1/output/cmrcnn101_nm_s1.0/val/results_ccf.pkl'),
    ]

    # should be sorted
    rtfs = [3, 5]

    n_method = len(caches)
    max_history = max(rtfs)

    cache_ccfs = [
        pickle.load(open(path, 'rb'))
            for path in caches
    ]
    cache_end_idx = n_method*[0]

    mkdir2(opts.out_dir)
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]

    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    results_ccf = []

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        
        w_img, h_img = db.imgs[0]['width'], db.imgs[0]['height']
        bf = BboxFilterEx(
            w_img, h_img,
            forecast=opts.forecast,
            forecast_before_assoc=opts.forecast_before_assoc,
        )

        # backward in time
        # cur, cur - 1, ..., cur - max_history
        result_buffer = (max_history + 1)*[None]
        result_buffer_marker = (max_history + 1)*[None]
        min_rtf = min(rtfs)

        for ii, img in enumerate(frame_list):
            # fetch results
            for i in range(n_method):
                ifidx = ii - rtfs[i]
                if ifidx < 0:
                    break
                cache_end_idx[i], bboxes, scores, labels = \
                    result_from_ccf(cache_ccfs[i], frame_list[ifidx]['id'], cache_end_idx[i], mask=False)
                result_buffer[rtfs[i]] = (bboxes, scores, labels)
                result_buffer_marker[rtfs[i]] = i == 0
            if ii < min_rtf:
                # no method has finished yet
                bboxes, scores, labels, tracks = [], [], [], []
            else:
                s = max_history
                while s >= 0 and result_buffer[s] is None:
                    s -= 1
                # if first result is one step ahead
                t = ii - s
                birth_tracks = True
                kill_tracks = True
                if s == max_history:
                    bf.update(t, *result_buffer[s], None, birth_tracks, kill_tracks)
                    bf_this = deepcopy(bf)
                else:
                    bf_this = deepcopy(bf)
                    bf_this.update(t, *result_buffer[s], None, birth_tracks, kill_tracks)

                while 1:
                    # find next non-empty result
                    s -= 1
                    while s >= 0 and result_buffer[s] is None:
                        s -= 1
                    if s < 0:
                        break
                    if result_buffer_marker[s]:
                        birth_tracks = opts.fast_birth_tracks
                        kill_tracks = opts.fast_kill_tracks
                    else:
                        birth_tracks = True
                        kill_tracks = True

                    t = ii - s
                    bf_this.update(t, *result_buffer[s], None, birth_tracks, kill_tracks)
                
                bboxes, scores, labels, _, tracks = \
                    bf_this.predict(ii)

                # shift result buffer
                result_buffer.pop(max_history)
                result_buffer.insert(0, None)
                result_buffer_marker.pop(max_history)
                result_buffer_marker.insert(0, None)


            n = len(bboxes)
            for i in range(n):
                result_dict = {
                    'image_id': img['id'],
                    'bbox': bboxes[i],
                    'score': scores[i],
                    'category_id': labels[i],
                }
                # if masks is not None:
                #     result_dict['segmentation'] = masks[i]
                results_ccf.append(result_dict)

            if vis_out:
                img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                I = imread(img_path)
                vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg')

                bboxes_ltrb = ltwh2ltrb(bboxes) if n else []
                if opts.overwrite or not isfile(vis_path):
                    vis_track(
                        I, bboxes_ltrb, tracks, labels,
                        class_names, None, scores,
                        out_scale=opts.vis_scale,
                        out_file=vis_path
                    ) 

    out_path = join(opts.out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(opts.out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))
        if opts.eval_mask:
            print('Evaluating instance segmentation')
            eval_summary = eval_ccf(db, results_ccf, iou_type='segm')
            out_path = join(opts.out_dir, 'eval_summary_mask.pkl')
            if opts.overwrite or not isfile(out_path):
                pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')
Exemple #12
0
def main():
    assert torch.cuda.device_count(
    ) == 1  # mmdet only supports single GPU testing

    opts = parse_args()

    mkdir2(opts.out_dir)
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get(
        'coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    model = init_detector(opts)
    if opts.weights_base is not None:
        # for distillation purpose
        load_checkpoint(model, opts.weights_base)
    if opts.cpu_pre:
        img_transform = ImageTransform(
            size_divisor=model.cfg.data.test.size_divisor,
            **model.cfg.img_norm_cfg)
    else:
        img_transform = ImageTransformGPU(
            size_divisor=model.cfg.data.test.size_divisor,
            **model.cfg.img_norm_cfg)
    device = next(model.parameters()).device  # model device
    n_history = model.cfg.data.train.n_history if opts.n_history is None else opts.n_history
    n_future = model.cfg.data.train.n_future if opts.n_future is None else opts.n_future

    results_ccf = []  # instance based
    runtime_all = []
    for sid, seq in enumerate(tqdm(seqs)):
        # print(seq)
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        n_frame = len(frame_list)

        # load all frames in advance
        frames = []
        for img in frame_list:
            img_path = join(opts.data_root, seq_dirs[sid], img['name'])
            frames.append(imread(img_path))

        with torch.no_grad():
            preprocessed = []
            for i in range(n_history):
                data = _prepare_data(frames[i], img_transform, model.cfg,
                                     device)
                preprocessed.append(data)
            for ii in range(n_history, n_frame - n_future):
                # target frame
                iid = frame_list[ii + n_future]['id']
                img_name = frame_list[ii + n_future]['name']
                I = frames[ii + n_future]

                t_start = perf_counter()
                # input frame
                data = _prepare_data(frames[ii], img_transform, model.cfg,
                                     device)
                # if n_history == 0:
                #     data_merge = data
                #     # print(data['img'])
                #     # print(data['img'][0].shape)
                #     # print(data['img'][0][0][0][300][300:305])
                #     # import sys
                #     # sys.exit()
                # else:
                preprocessed.append(data)
                # print(preprocessed[0]['img'][0].data_ptr())
                # print(preprocessed[2]['img'][0].data_ptr())
                # print(torch.all(preprocessed[0]['img'][0] == preprocessed[2]['img'][0]))
                imgs = [d['img'][0] for d in preprocessed]
                imgs = torch.cat(imgs, 0)
                imgs = imgs.unsqueeze(0)
                data_merge = {
                    'img': [imgs],
                    'img_meta': data['img_meta'],
                }
                # print(data_merge['img'][0][0][2][0][300][300:305])
                # import sys
                # sys.exit()
                result = model(return_loss=False,
                               rescale=True,
                               numpy_res=True,
                               **data_merge)
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)
                # if ii == 2:
                #     print(ii, scores)
                #     import sys
                #     sys.exit()

                # if n_history != 0:
                del preprocessed[0]
                t_end = perf_counter()
                runtime_all.append(t_end - t_start)

                if vis_out:
                    vis_path = join(opts.vis_dir, seq, img_name[:-3] + 'jpg')
                    if opts.overwrite or not isfile(vis_path):
                        vis_det(I,
                                bboxes,
                                labels,
                                class_names,
                                masks,
                                scores,
                                out_scale=opts.vis_scale,
                                out_file=vis_path)

                # convert to coco fmt
                n = len(bboxes)
                if n:
                    bboxes[:, 2:] -= bboxes[:, :2]

                for i in range(n):
                    result_dict = {
                        'image_id': iid,
                        'bbox': bboxes[i],
                        'score': scores[i],
                        'category_id': labels[i],
                    }
                    if masks is not None:
                        result_dict['segmentation'] = masks[i]
                    results_ccf.append(result_dict)

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump({
            'runtime_all': runtime_all,
            'n_total': len(runtime_all),
        }, open(out_path, 'wb'))

    # convert to ms for display
    s2ms = lambda x: 1e3 * x

    print_stats(runtime_all, 'Runtime (ms)', cvt=s2ms)

    out_path = join(opts.out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(opts.out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}"')
Exemple #13
0
def main():
    opts = parse_args()

    mkdir2(opts.out_dir)
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    assert class_names[5] == 'truck'
    assert class_names[2] == 'car'
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    in_ccf = pickle.load(open(join(opts.in_dir, 'results_ccf.pkl'), 'rb'))
    results_ccf = []

    i_start = 0
    for sid, seq in enumerate(tqdm(seqs)):
        # if seq != '5ab2697b-6e3e-3454-a36a-aba2c6f27818':
        #     continue
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        for ii, img in enumerate(frame_list):
            iid = img['id']
            i_end = i_start
            while i_end < len(in_ccf) and in_ccf[i_end]['image_id'] == iid:
                i_end += 1
            dets = in_ccf[i_start:i_end]
            i_start = i_end
            # dets = [r for r in in_ccf if r['image_id'] == img['id']]
            bboxes = np.array([d['bbox'] for d in dets])
            labels = np.array([d['category_id'] for d in dets])
            scores = np.array([d['score'] for d in dets])
            with_mask = len(dets) and 'segmentation' in dets[0]
            if with_mask:
                masks = np.array([d['segmentation'] for d in dets])

            score_argsort = np.argsort(scores)[::-1]
            bboxes = bboxes[score_argsort]
            scores = scores[score_argsort]
            labels = labels[score_argsort]
            if with_mask:
                masks = masks[score_argsort]

            remove_idx = remove_car_for_pickup(bboxes, labels, scores)
            bboxes = np.delete(bboxes, remove_idx, 0)
            labels = np.delete(labels, remove_idx, 0)
            scores = np.delete(scores, remove_idx, 0)
            if with_mask:
                masks = np.delete(masks, remove_idx, 0)
            n = len(bboxes)
            for i in range(n):
                result_dict = {
                    'image_id': img['id'],
                    'bbox': bboxes[i],
                    'score': scores[i],
                    'category_id': labels[i],
                }
                if with_mask:
                    result_dict['segmentation'] = masks[i]
                results_ccf.append(result_dict)

            if vis_out:
                img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                I = imread(img_path)
                vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg')

                bboxes_ltrb = bboxes.copy()
                if len(bboxes_ltrb):
                    bboxes_ltrb[:, 2:] += bboxes_ltrb[:, :2]
                if opts.overwrite or not isfile(vis_path):
                    vis_det(I,
                            bboxes_ltrb,
                            labels,
                            class_names,
                            masks if with_mask else None,
                            scores,
                            out_scale=opts.vis_scale,
                            out_file=vis_path)
    out_path = join(opts.out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(opts.out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}" --fps 30')
Exemple #14
0
def main():
    assert torch.cuda.device_count(
    ) == 1  # mmdet only supports single GPU testing

    opts = parse_args()

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    model = init_detector(opts)

    # warm up the GPU
    img = db.imgs[0]
    w_img, h_img = img['width'], img['height']
    _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8))
    torch.cuda.synchronize()

    runtime_all = []
    n_processed = 0
    n_total = 0

    # global pr
    import cProfile
    pr = cProfile.Profile()

    for sid, seq in enumerate(tqdm(seqs)):
        # if sid > 1:
        #     break
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]

        # load all frames in advance

        frames = []
        for img in frame_list:
            img_path = join(opts.data_root, seq_dirs[sid], img['name'])
            frames.append(imread(img_path))
        n_frame = len(frames)
        n_total += n_frame

        timestamps = []
        results = []
        input_fidx = []
        runtime = []
        last_fidx = None

        t_total = n_frame / opts.fps
        t_start = perf_counter()
        while 1:
            t1 = perf_counter()
            t_elapsed = t1 - t_start
            if t_elapsed >= t_total:
                break

            # identify latest available frame
            fidx = int(np.floor(t_elapsed * opts.fps))
            #   t_elapsed/t_total *n_frame
            # = t_elapsed*opts.fps
            if fidx == last_fidx:
                continue
            last_fidx = fidx
            frame = frames[fidx]

            pr.enable()
            result = inference_detector(model, frame)
            torch.cuda.synchronize()
            pr.disable()

            t2 = perf_counter()
            t_elapsed = t2 - t_start
            if t_elapsed >= t_total:
                break

            timestamps.append(t_elapsed)
            results.append(result)
            input_fidx.append(fidx)
            runtime.append(t2 - t1)

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump((
                results,
                timestamps,
                input_fidx,
                runtime,
            ), open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results)

    pr.dump_stats('_par_/mrcnn50_s0.5_1080ti_gpupre_blocking.prof')
    # pr.dump_stats('debug.prof')

    runtime_all_np = np.array(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0 / opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(
            {
                'runtime_all': runtime_all,
                'n_processed': n_processed,
                'n_total': n_total,
                'n_small_runtime': n_small_runtime,
            }, open(out_path, 'wb'))
    # convert to ms for display
    runtime_all_np *= 1e3

    print(f'{n_processed}/{n_total} frames processed')
    print('Runtime (ms): mean: %g; std: %g; min: %g; max: %g' % (
        runtime_all_np.mean(),
        runtime_all_np.std(ddof=1),
        runtime_all_np.min(),
        runtime_all_np.max(),
    ))
    print(f'Runtime smaller than unit time interval: '
          f'{n_small_runtime}/{n_processed} '
          f'({100.0*n_small_runtime/n_processed:.4g}%)')
Exemple #15
0
def main():
    opts = parse_args()
    assert opts.forecast_before_assoc, "Not implemented"

    mkdir2(opts.out_dir)
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    results_ccf = []
    in_time = 0
    miss = 0
    shifts = 0

    given_tracks = opts.assoc == 'given'
    assert not given_tracks, "Not implemented"

    t_assoc = []
    t_forecast = []
    # tt = []

    with torch.no_grad():
        kf_F = torch.eye(8)
        # kf_F[3, 7] = 1
        kf_Q = torch.eye(8)
        kf_R = 10 * torch.eye(4)
        kf_P_init = 100 * torch.eye(8).unsqueeze(0)

        for sid, seq in enumerate(tqdm(seqs)):
            frame_list = [img for img in db.imgs.values() if img['sid'] == sid]

            results = pickle.load(open(join(opts.in_dir, seq + '.pkl'), 'rb'))
            # use raw results when possible in case we change class subset during evaluation
            results_raw = results.get('results_raw', None)
            if results_raw is None:
                results_parsed = results['results_parsed']
            timestamps = results['timestamps']
            input_fidx = results['input_fidx']

            # t1 -> det1, t2 -> det2, interpolate at t3 (t3 is the current time)
            det_latest_p1 = 0  # latest detection index + 1
            det_t2 = None  # detection index at t2
            kf_x = torch.empty((0, 8, 1))
            kf_P = torch.empty((0, 8, 8))
            n_matched12 = 0

            if not given_tracks:
                tkidx = 0  # track starting index

            for ii, img in enumerate(frame_list):
                # pred, gt association by time
                t = (ii - opts.eta) / opts.fps
                while det_latest_p1 < len(
                        timestamps) and timestamps[det_latest_p1] <= t:
                    det_latest_p1 += 1
                if det_latest_p1 == 0:
                    # no detection output
                    miss += 1
                    bboxes_t3, scores, labels, tracks = [], [], [], []
                else:
                    det_latest = det_latest_p1 - 1
                    ifidx = input_fidx[det_latest]
                    in_time += int(ii == ifidx)
                    shifts += ii - ifidx

                    if det_latest != det_t2:
                        # new detection
                        # we can now throw away old result (t1)
                        # the old one is kept for forecasting purpose

                        if len(kf_x) and opts.forecast_before_assoc:
                            dt = ifidx - input_fidx[det_t2]
                            dt = int(
                                dt
                            )  # convert from numpy to basic python format
                            w_img, h_img = img['width'], img['height']

                            kf_F = make_F(kf_F, dt)
                            kf_Q = make_Q(kf_Q, dt)

                            kf_x, kf_P = batch_kf_predict(
                                kf_F, kf_x, kf_P, kf_Q)
                            bboxes_f = x2bbox(kf_x)

                        det_t2 = det_latest
                        if results_raw is None:
                            result_t2 = results_parsed[det_t2]
                            bboxes_t2, scores_t2, labels_t2, _ = result_t2[:4]
                            if not given_tracks and len(result_t2) > 4:
                                tracks_t2 = np.asarray(result_t2[4])
                            else:
                                tracks_t2 = np.empty((0, ), np.uint32)
                        else:
                            result_t2 = results_raw[det_t2]
                            bboxes_t2, scores_t2, labels_t2, _ = \
                                parse_det_result(result_t2, coco_mapping, n_class)

                        t1 = perf_counter()
                        n = len(bboxes_t2)
                        if n:
                            if not given_tracks:
                                # put high scores det first for better iou matching
                                score_argsort = np.argsort(scores_t2)[::-1]
                                bboxes_t2 = bboxes_t2[score_argsort]
                                scores_t2 = scores_t2[score_argsort]
                                labels_t2 = labels_t2[score_argsort]

                            ltrb2ltwh_(bboxes_t2)

                            if given_tracks:
                                raise NotImplementedError
                                order1, order2, n_matched12 = track_based_shuffle(
                                    tracks, tracks_t2, no_unmatched1=True)
                                tracks = tracks[order2]
                            else:
                                updated = False
                                if len(kf_x):
                                    order1, order2, n_matched12, tracks, tkidx = iou_assoc(
                                        bboxes_f,
                                        labels,
                                        tracks,
                                        tkidx,
                                        bboxes_t2,
                                        labels_t2,
                                        opts.match_iou_th,
                                        no_unmatched1=True,
                                    )

                                    if n_matched12:
                                        kf_x = kf_x[order1]
                                        kf_P = kf_P[order1]
                                        kf_x, kf_P = batch_kf_update(
                                            bbox2z(bboxes_t2[
                                                order2[:n_matched12]]),
                                            kf_x,
                                            kf_P,
                                            kf_R,
                                        )

                                        kf_x_new = bbox2x(
                                            bboxes_t2[order2[n_matched12:]])
                                        n_unmatched2 = len(
                                            bboxes_t2) - n_matched12
                                        kf_P_new = kf_P_init.expand(
                                            n_unmatched2, -1, -1)
                                        kf_x = torch.cat((kf_x, kf_x_new))
                                        kf_P = torch.cat((kf_P, kf_P_new))
                                        labels = labels_t2[order2]
                                        scores = scores_t2[order2]
                                        updated = True

                                if not updated:
                                    # start from scratch
                                    kf_x = bbox2x(bboxes_t2)
                                    kf_P = kf_P_init.expand(
                                        len(bboxes_t2), -1, -1)
                                    labels = labels_t2
                                    scores = scores_t2
                                    if not given_tracks:
                                        tracks = np.arange(tkidx,
                                                           tkidx + n,
                                                           dtype=np.uint32)
                                        tkidx += n

                            t2 = perf_counter()
                            t_assoc.append(t2 - t1)

                    t3 = perf_counter()
                    if len(kf_x):
                        dt = ii - ifidx
                        w_img, h_img = img['width'], img['height']

                        # PyTorch small matrix multiplication is slow
                        # use numpy instead

                        # kf_F = make_F(kf_F, dt)
                        # bboxes_t3 = x2bbox(batch_kf_predict_only(kf_F, kf_x))
                        # t5 = perf_counter()
                        kf_x_np = kf_x[:, :, 0].numpy()
                        # t6 = perf_counter()
                        bboxes_t3 = kf_x_np[:n_matched12, :
                                            4] + dt * kf_x_np[:n_matched12, 4:]
                        if n_matched12 < len(kf_x):
                            bboxes_t3 = np.concatenate(
                                (bboxes_t3, kf_x_np[n_matched12:, :4]))

                        bboxes_t3, keep = extrap_clean_up(bboxes_t3,
                                                          w_img,
                                                          h_img,
                                                          lt=True)
                        # torch_keep = torch.from_numpy(keep) # boolean mask
                        # kf_x = kf_x[torch_keep]
                        # kf_P = kf_P[torch_keep]
                        # labels = labels[keep]
                        # scores = scores[keep]
                        # tracks = tracks[keep]

                        labels_t3 = labels[keep]
                        scores_t3 = scores[keep]
                        tracks_t3 = tracks[keep]
                        # tt.append(t6 - t5)

                    t4 = perf_counter()
                    t_forecast.append(t4 - t3)

                n = len(bboxes_t3)
                for i in range(n):
                    result_dict = {
                        'image_id': img['id'],
                        'bbox': bboxes_t3[i],
                        'score': scores_t3[i],
                        'category_id': labels_t3[i],
                    }
                    results_ccf.append(result_dict)

                if vis_out:
                    img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                    I = imread(img_path)
                    vis_path = join(opts.vis_dir, seq,
                                    img['name'][:-3] + 'jpg')

                    bboxes = bboxes_t3.copy()
                    if n:
                        ltwh2ltrb_(bboxes)
                    if opts.overwrite or not isfile(vis_path):
                        vis_track(
                            I,
                            bboxes,
                            tracks,
                            labels,
                            class_names,
                            None,
                            scores,
                            out_scale=opts.vis_scale,
                            out_file=vis_path,
                        )

    # pr.disable()
    # pr.dump_stats('pps_iou_lin_extrap.prof')

    # pickle.dump([t_assoc, t_forecast], open('pps_iou_lin_forecast_time', 'wb'))
    s2ms = lambda x: 1e3 * x
    if len(t_assoc):
        print_stats(t_assoc, "RT association (ms)", cvt=s2ms)
    if len(t_forecast):
        print_stats(t_forecast, "RT forecasting (ms)", cvt=s2ms)
    # if len(tt):
    #     print_stats(tt, "RT forecasting2 (ms)", cvt=s2ms)

    out_path = join(opts.out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(opts.out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')
def main():
    opts = parse_args()

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    if opts.cached_res:
        cache_in_ccf = '_ccf' in basename(opts.cached_res)
        if cache_in_ccf:
            # speed up based on the assumption of sequential storage
            cache_end_idx = 0
        cached_res = pickle.load(open(opts.cached_res, 'rb'))
    else:
        assert torch.cuda.device_count(
        ) == 1  # mmdet only supports single GPU testing
        model = init_detector(opts)

    np.random.seed(opts.seed)
    runtime = pickle.load(open(opts.runtime, 'rb'))
    runtime_dist = dist_from_dict(runtime, opts.perf_factor)

    runtime_all = []
    n_processed = 0
    n_total = 0

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        n_frame = len(frame_list)
        n_total += n_frame

        if not opts.cached_res:
            # load all frames in advance
            frames = []
            for img in frame_list:
                img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                frames.append(imread(img_path))

        timestamps = []
        results_raw = []
        results_parsed = []
        input_fidx = []
        runtime = []
        if opts.cached_res and cache_in_ccf:
            results_raw = None
        else:
            results_raw = []

        for ii in range(n_frame):
            t = ii / opts.fps

            if opts.cached_res:
                img = frame_list[ii]
                if cache_in_ccf:
                    cache_end_idx, bboxes, scores, labels, masks = \
                        result_from_ccf(cached_res, img['id'], cache_end_idx)
                    ltwh2ltrb_(bboxes)
                else:
                    result = cached_res[img['id']]
                    bboxes, scores, labels, masks = \
                        parse_det_result(result, coco_mapping, n_class)
            else:
                frame = frames[ii]
                result = inference_detector(model, frame)
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)

            rt_this = runtime_dist.draw()
            t_finishing = t + rt_this

            timestamps.append(t_finishing)
            if results_raw is not None:
                results_raw.append(result)
            results_parsed.append((bboxes, scores, labels, masks))
            input_fidx.append(ii)
            runtime.append(rt_this)

        # since parallel excecution, the order is not guaranteed
        idx = np.argsort(timestamps)
        timestamps = [timestamps[i] for i in idx]
        if results_raw is not None:
            results_raw = [results_raw[i] for i in idx]
        results_parsed = [results_parsed[i] for i in idx]
        input_fidx = [input_fidx[i] for i in idx]
        runtime = [runtime[i] for i in idx]

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            out_dict = {
                'results_parsed': results_parsed,
                'timestamps': timestamps,
                'input_fidx': input_fidx,
                'runtime': runtime,
            }
            if results_raw is not None:
                out_dict['results_raw'] = results_raw
            pickle.dump(out_dict, open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results_parsed)

    runtime_all_np = np.array(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0 / opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(
            {
                'runtime_all': runtime_all,
                'n_processed': n_processed,
                'n_total': n_total,
                'n_small_runtime': n_small_runtime,
            }, open(out_path, 'wb'))

    # convert to ms for display
    s2ms = lambda x: 1e3 * x

    print(f'{n_processed}/{n_total} frames processed')
    print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms)
    print(f'Runtime smaller than unit time interval: '
          f'{n_small_runtime}/{n_processed} '
          f'({100.0*n_small_runtime/n_processed:.4g}%)')