def main(): assert torch.cuda.device_count( ) == 1 # mmdet only supports single GPU testing opts = parse_args() mkdir2(opts.out_dir) vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get( 'coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] model = init_detector(opts) results_raw = [] # image based, all 80 COCO classes results_ccf = [] # instance based for iid, img in tqdm(db.imgs.items()): img_name = img['name'] sid = img['sid'] seq_name = seqs[sid] img_path = join(opts.data_root, seq_dirs[sid], img_name) I = imread(img_path) result = inference_detector(model, I, gpu_pre=not opts.cpu_pre) results_raw.append(result) bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) if vis_out: vis_path = join(opts.vis_dir, seq_name, img_name[:-3] + 'jpg') if opts.overwrite or not isfile(vis_path): vis_det(I, bboxes, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path) # convert to coco fmt n = len(bboxes) if n: ltrb2ltwh_(bboxes) for i in range(n): result_dict = { 'image_id': iid, 'bbox': bboxes[i], 'score': scores[i], 'category_id': labels[i], } if masks is not None: result_dict['segmentation'] = masks[i] results_ccf.append(result_dict) out_path = join(opts.out_dir, 'results_raw.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_raw, open(out_path, 'wb')) out_path = join(opts.out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(opts.out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if opts.eval_mask: print('Evaluating instance segmentation') eval_summary = eval_ccf(db, results_ccf, iou_type='segm') out_path = join(opts.out_dir, 'eval_summary_mask.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}"')
def main(): opts = parse_args() out_dir = mkdir2(opts.out_dir) if opts.out_dir else opts.result_dir vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get( 'coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] results_ccf = [] in_time = 0 miss = 0 mismatch = 0 print('Pairing the output with the ground truth') for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] results = pickle.load(open(join(opts.result_dir, seq + '.pkl'), 'rb')) # use raw results when possible in case we change class subset during evaluation if opts.use_parsed: results_parsed = results['results_parsed'] else: results_raw = results.get('results_raw', None) if results_raw is None: results_parsed = results['results_parsed'] timestamps = results['timestamps'] input_fidx = results['input_fidx'] tidx_p1 = 0 for ii, img in enumerate(frame_list): # pred, gt association by time t = (ii - opts.eta) / opts.fps while tidx_p1 < len(timestamps) and timestamps[tidx_p1] <= t: tidx_p1 += 1 if tidx_p1 == 0: # no output miss += 1 bboxes, scores, labels = [], [], [] masks, tracks = None, None else: tidx = tidx_p1 - 1 ifidx = input_fidx[tidx] in_time += int(ii == ifidx) mismatch += ii - ifidx if opts.use_parsed or results_raw is None: result = results_parsed[tidx] bboxes, scores, labels, masks = result[:4] if len(result) > 4: tracks = result[4] else: tracks = None else: result = results_raw[tidx] bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) tracks = None if vis_out: img_path = join(opts.data_root, seq_dirs[sid], img['name']) I = imread(img_path) vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg') if opts.overwrite or not isfile(vis_path): if tracks is None: vis_det( I, bboxes, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path, ) else: vis_track( I, bboxes, tracks, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path, ) # convert to coco fmt n = len(bboxes) if n: bboxes_ltwh = ltrb2ltwh(bboxes) for i in range(n): result_dict = { 'image_id': img['id'], 'bbox': bboxes_ltwh[i], 'score': scores[i], 'category_id': labels[i], } if masks is not None: result_dict['segmentation'] = masks[i] results_ccf.append(result_dict) out_path = join(out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) out_path = join(out_dir, 'eval_assoc.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'miss': miss, 'in_time': in_time, 'mismatch': mismatch, }, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if opts.eval_mask: print('Evaluating instance segmentation') eval_summary = eval_ccf(db, results_ccf, iou_type='segm') out_path = join(out_dir, 'eval_summary_mask.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')
def main(): opts = parse_args() mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] if opts.cached_res: cache_in_ccf = '_ccf' in basename(opts.cached_res) if cache_in_ccf: # speed up based on the assumption of sequential storage cache_end_idx = 0 cached_res = pickle.load(open(opts.cached_res, 'rb')) else: assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing model = init_detector(opts) np.random.seed(opts.seed) runtime = pickle.load(open(opts.runtime, 'rb')) runtime_dist = dist_from_dict(runtime, opts.perf_factor) runtime_all = [] n_processed = 0 n_total = 0 for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] n_frame = len(frame_list) n_total += n_frame if not opts.cached_res: # load all frames in advance frames = [] for img in frame_list: img_path = join(opts.data_root, seq_dirs[sid], img['name']) frames.append(imread(img_path)) timestamps = [] results_parsed = [] input_fidx = [] runtime = [] last_fidx = None if opts.cached_res and cache_in_ccf: results_raw = None else: results_raw = [] t_total = n_frame/opts.fps t_elapsed = 0 if opts.dynamic_schedule: mean_rtf = runtime_dist.mean()*opts.fps else: stride_cnt = 0 while 1: if t_elapsed >= t_total: break # identify latest available frame fidx_continous = t_elapsed*opts.fps fidx = int(np.floor(fidx_continous)) if fidx == last_fidx: # algorithm is fast and has some idle time fidx += 1 if fidx == n_frame: break t_elapsed = fidx/opts.fps last_fidx = fidx if opts.dynamic_schedule: if mean_rtf > 1: # when runtime <= 1, it should always process every frame fidx_remainder = fidx_continous - fidx if mean_rtf < np.floor(fidx_remainder + mean_rtf): # wait till next frame continue else: if stride_cnt % opts.det_stride == 0: stride_cnt = 1 else: stride_cnt += 1 continue if opts.cached_res: img = frame_list[fidx] if cache_in_ccf: cache_end_idx, bboxes, scores, labels, masks = \ result_from_ccf(cached_res, img['id'], cache_end_idx) ltwh2ltrb_(bboxes) else: result = cached_res[img['id']] bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) else: frame = frames[fidx] result = inference_detector(model, frame) bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) rt_this = runtime_dist.draw() t_elapsed += rt_this if t_elapsed >= t_total: break timestamps.append(t_elapsed) if results_raw is not None: results_raw.append(result) results_parsed.append((bboxes, scores, labels, masks)) input_fidx.append(fidx) runtime.append(rt_this) out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): out_dict = { 'results_parsed': results_parsed, 'timestamps': timestamps, 'input_fidx': input_fidx, 'runtime': runtime, } if results_raw is not None: out_dict['results_raw'] = results_raw pickle.dump(out_dict, open(out_path, 'wb')) runtime_all += runtime n_processed += len(results_parsed) runtime_all_np = np.array(runtime_all) n_small_runtime = (runtime_all_np < 1.0/opts.fps).sum() out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'runtime_all': runtime_all, 'n_processed': n_processed, 'n_total': n_total, 'n_small_runtime': n_small_runtime, }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3*x print(f'{n_processed}/{n_total} frames processed') print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms) print(f'Runtime smaller than unit time interval: ' f'{n_small_runtime}/{n_processed} ' f'({100.0*n_small_runtime/n_processed:.4g}%)')
def main(): assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing opts = parse_args() assert (not opts.mask_timing) or (opts.mask_timing and not opts.no_mask) mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] model = init_detector(opts) # warm up the GPU img = db.imgs[0] w_img, h_img = img['width'], img['height'] _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8)) torch.cuda.synchronize() runtime_all = [] n_processed = 0 n_total = 0 for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] # load all frames in advance frames = [] for img in frame_list: img_path = join(opts.data_root, seq_dirs[sid], img['name']) frames.append(imread(img_path)) n_frame = len(frames) n_total += n_frame timestamps = [] results_raw = [] results_parsed = [] input_fidx = [] runtime = [] last_fidx = None if not opts.dynamic_schedule: stride_cnt = 0 t_total = n_frame/opts.fps t_start = perf_counter() while 1: t1 = perf_counter() t_elapsed = t1 - t_start if t_elapsed >= t_total: break # identify latest available frame fidx_continous = t_elapsed*opts.fps fidx = int(np.floor(fidx_continous)) if fidx == last_fidx: continue last_fidx = fidx if opts.dynamic_schedule: fidx_remainder = fidx_continous - fidx if fidx_remainder > 0.5: continue else: if stride_cnt % opts.det_stride == 0: stride_cnt = 1 else: stride_cnt += 1 continue frame = frames[fidx] result = inference_detector(model, frame, gpu_pre=not opts.cpu_pre, decode_mask=not opts.mask_timing) if opts.mask_timing: mask_encoded = result[2] result = result[:2] bboxes, scores, labels, _, sel = \ parse_det_result(result, coco_mapping, n_class, return_sel=True) else: bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) torch.cuda.synchronize() t2 = perf_counter() t_elapsed = t2 - t_start if t_elapsed >= t_total: break if opts.mask_timing: masks = decode_mask(mask_encoded, sel) timestamps.append(t_elapsed) results_raw.append(result) results_parsed.append((bboxes, scores, labels, masks)) input_fidx.append(fidx) runtime.append(t2 - t1) out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'results_raw': results_raw, 'results_parsed': results_parsed, 'timestamps': timestamps, 'input_fidx': input_fidx, 'runtime': runtime, }, open(out_path, 'wb')) runtime_all += runtime n_processed += len(results_raw) runtime_all_np = np.asarray(runtime_all) n_small_runtime = (runtime_all_np < 1.0/opts.fps).sum() out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'runtime_all': runtime_all, 'n_processed': n_processed, 'n_total': n_total, 'n_small_runtime': n_small_runtime, }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3*x print(f'{n_processed}/{n_total} frames processed') print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms) print(f'Runtime smaller than unit time interval: ' f'{n_small_runtime}/{n_processed} ' f'({100.0*n_small_runtime/n_processed:.4g}%)')
def main(): assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing opts = parse_args() mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] img = db.imgs[0] w_img, h_img = img['width'], img['height'] mp.set_start_method('spawn') frame_recv, frame_send = mp.Pipe(False) det_res_recv, det_res_send = mp.Pipe(False) det_proc = mp.Process(target=det_process, args=(opts, frame_recv, det_res_send, w_img, h_img)) det_proc.start() if opts.dynamic_schedule: runtime = pickle.load(open(opts.runtime, 'rb')) runtime_dist = dist_from_dict(runtime, opts.perf_factor) mean_rtf = runtime_dist.mean()*opts.fps n_total = 0 t_det_all = [] t_send_frame_all = [] t_recv_res_all = [] t_assoc_all = [] t_forecast_all = [] with torch.no_grad(): kf_F = torch.eye(8) kf_Q = torch.eye(8) kf_R = 10*torch.eye(4) kf_P_init = 100*torch.eye(8).unsqueeze(0) for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] frame_list = [join(opts.data_root, seq_dirs[sid], img['name']) for img in frame_list] n_frame = len(frame_list) n_total += n_frame timestamps = [] results_parsed = [] input_fidx = [] processing = False fidx_t2 = None # detection input index at t2 fidx_latest = None tkidx = 0 # track starting index kf_x = torch.empty((0, 8, 1)) kf_P = torch.empty((0, 8, 8)) n_matched12 = 0 # let detector process to read all the frames frame_send.send(frame_list) # it is possible that unfetched results remain in the pipe while 1: msg = det_res_recv.recv() # wait till the detector is ready if msg == 'ready': break elif isinstance(msg, Exception): raise msg t_total = n_frame/opts.fps t_unit = 1/opts.fps t_start = perf_counter() while 1: t1 = perf_counter() t_elapsed = t1 - t_start if t_elapsed >= t_total: break # identify latest available frame fidx_continous = t_elapsed*opts.fps fidx = int(np.floor(fidx_continous)) if fidx == fidx_latest: # algorithm is fast and has some idle time wait_for_next = True else: wait_for_next = False if opts.dynamic_schedule: if mean_rtf >= 1: # when runtime < 1, it should always process every frame fidx_remainder = fidx_continous - fidx if mean_rtf < np.floor(fidx_remainder + mean_rtf): # wait till next frame wait_for_next = True if wait_for_next: # sleep continue if not processing: t_start_frame = perf_counter() frame_send.send((fidx, t_start_frame)) fidx_latest = fidx processing = True # wait till query - forecast-rt-ub wait_time = t_unit - opts.forecast_rt_ub if det_res_recv.poll(wait_time): # wait # new result result = det_res_recv.recv() if isinstance(result, Exception): raise result result, t_send_frame, t_start_res = result bboxes_t2, scores_t2, labels_t2, _ = \ parse_det_result(result, coco_mapping, n_class) processing = False t_det_end = perf_counter() t_det_all.append(t_det_end - t_start_frame) t_send_frame_all.append(t_send_frame) t_recv_res_all.append(t_det_end - t_start_res) # associate across frames t_assoc_start = perf_counter() if len(kf_x): dt = fidx_latest - fidx_t2 kf_F = make_F(kf_F, dt) kf_Q = make_Q(kf_Q, dt) kf_x, kf_P = batch_kf_predict(kf_F, kf_x, kf_P, kf_Q) bboxes_f = x2bbox(kf_x) fidx_t2 = fidx_latest n = len(bboxes_t2) if n: # put high scores det first for better iou matching score_argsort = np.argsort(scores_t2)[::-1] bboxes_t2 = bboxes_t2[score_argsort] scores_t2 = scores_t2[score_argsort] labels_t2 = labels_t2[score_argsort] ltrb2ltwh_(bboxes_t2) updated = False if len(kf_x): order1, order2, n_matched12, tracks, tkidx = iou_assoc( bboxes_f, labels, tracks, tkidx, bboxes_t2, labels_t2, opts.match_iou_th, no_unmatched1=True, ) if n_matched12: kf_x = kf_x[order1] kf_P = kf_P[order1] kf_x, kf_P = batch_kf_update( bbox2z(bboxes_t2[order2[:n_matched12]]), kf_x, kf_P, kf_R, ) kf_x_new = bbox2x(bboxes_t2[order2[n_matched12:]]) n_unmatched2 = len(bboxes_t2) - n_matched12 kf_P_new = kf_P_init.expand(n_unmatched2, -1, -1) kf_x = torch.cat((kf_x, kf_x_new)) kf_P = torch.cat((kf_P, kf_P_new)) labels = labels_t2[order2] scores = scores_t2[order2] updated = True if not updated: # start from scratch kf_x = bbox2x(bboxes_t2) kf_P = kf_P_init.expand(len(bboxes_t2), -1, -1) labels = labels_t2 scores = scores_t2 tracks = np.arange(tkidx, tkidx + n, dtype=np.uint32) tkidx += n t_assoc_end = perf_counter() t_assoc_all.append(t_assoc_end - t_assoc_start) # apply forecasting for the current query t_forecast_start = perf_counter() query_pointer = fidx + opts.eta + 1 if len(kf_x): dt = (query_pointer - fidx_t2) kf_x_np = kf_x[:, :, 0].numpy() bboxes_t3 = kf_x_np[:n_matched12, :4] + dt*kf_x_np[:n_matched12, 4:] if n_matched12 < len(kf_x): bboxes_t3 = np.concatenate((bboxes_t3, kf_x_np[n_matched12:, :4])) bboxes_t3, keep = extrap_clean_up(bboxes_t3, w_img, h_img, lt=True) labels_t3 = labels[keep] scores_t3 = scores[keep] tracks_t3 = tracks[keep] else: bboxes_t3 = np.empty((0, 4), dtype=np.float32) scores_t3 = np.empty((0,), dtype=np.float32) labels_t3 = np.empty((0,), dtype=np.int32) tracks_t3 = np.empty((0,), dtype=np.int32) t_forecast_end = perf_counter() t_forecast_all.append(t_forecast_end - t_forecast_start) t3 = perf_counter() t_elapsed = t3 - t_start if t_elapsed >= t_total: break if len(bboxes_t3): ltwh2ltrb_(bboxes_t3) if fidx_t2 is not None: timestamps.append(t_elapsed) results_parsed.append((bboxes_t3, scores_t3, labels_t3, None, tracks_t3)) input_fidx.append(fidx_t2) out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'results_parsed': results_parsed, 'timestamps': timestamps, 'input_fidx': input_fidx, }, open(out_path, 'wb')) # terminates the child process frame_send.send(None) out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'n_total': n_total, 't_det': t_det_all, 't_send_frame': t_send_frame_all, 't_recv_res': t_recv_res_all, 't_assoc': t_assoc_all, 't_forecast': t_forecast_all, }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3*x print_stats(t_det_all, 'Runtime detection (ms)', cvt=s2ms) print_stats(t_send_frame_all, 'Runtime sending the frame (ms)', cvt=s2ms) print_stats(t_recv_res_all, 'Runtime receiving the result (ms)', cvt=s2ms) print_stats(t_assoc_all, "Runtime association (ms)", cvt=s2ms) print_stats(t_forecast_all, "Runtime forecasting (ms)", cvt=s2ms)
def main(): assert torch.cuda.device_count( ) == 1 # mmdet only supports single GPU testing opts = parse_args() mp.set_start_method('spawn') frame_recv, frame_send = mp.Pipe(False) det_res_recv, det_res_send = mp.Pipe(False) det_proc = mp.Process(target=det_process, args=(opts, frame_recv, det_res_send)) det_proc.start() mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] runtime_all = [] n_processed = 0 n_total = 0 t_send_frame_all = [] t_recv_res_all = [] for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] frame_list = [ join(opts.data_root, seq_dirs[sid], img['name']) for img in frame_list ] n_frame = len(frame_list) n_total += n_frame timestamps = [] results_raw = [] results_parsed = [] input_fidx = [] runtime = [] last_fidx = None stride_cnt = 0 # let detector process to read all the frames frame_send.send(frame_list) init_error = det_res_recv.recv() # wait till the detector is ready if init_error is not None: raise init_error t_total = n_frame / opts.fps t_start = perf_counter() while 1: t1 = perf_counter() t_elapsed = t1 - t_start if t_elapsed >= t_total: break # identify latest available frame fidx = int(np.floor(t_elapsed * opts.fps)) # t_elapsed/t_total *n_frame # = t_elapsed*opts.fps if fidx == last_fidx: continue last_fidx = fidx if stride_cnt % opts.det_stride == 0: stride_cnt = 1 else: stride_cnt += 1 continue t_start_frame = perf_counter() frame_send.send((fidx, t_start_frame)) result = det_res_recv.recv() # wait if isinstance(result, Exception): raise result result, t_send_frame, t_start_res = result bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) t2 = perf_counter() t_send_frame_all.append(t_send_frame) t_recv_res_all.append(t2 - t_start_res) t_elapsed = t2 - t_start if t_elapsed >= t_total: break timestamps.append(t_elapsed) results_raw.append(result) results_parsed.append((bboxes, scores, labels, masks)) input_fidx.append(fidx) runtime.append(t2 - t1) out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): pickle.dump( { 'results_raw': results_raw, 'results_parsed': results_parsed, 'timestamps': timestamps, 'input_fidx': input_fidx, 'runtime': runtime, }, open(out_path, 'wb')) runtime_all += runtime n_processed += len(results_raw) # terminates the child process frame_send.send(None) runtime_all_np = np.asarray(runtime_all) n_small_runtime = (runtime_all_np < 1.0 / opts.fps).sum() out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump( { 'runtime_all': runtime_all, 'n_processed': n_processed, 'n_total': n_total, 'n_small_runtime': n_small_runtime, }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3 * x print(f'{n_processed}/{n_total} frames processed') print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms) print(f'Runtime smaller than unit time interval: ' f'{n_small_runtime}/{n_processed} ' f'({100.0*n_small_runtime/n_processed:.4g}%)') print_stats(t_send_frame_all, 'Time spent on sending the frame (ms)', cvt=s2ms) print_stats(t_recv_res_all, 'Time spent on receiving the result (ms)', cvt=s2ms)
def main(): assert torch.cuda.device_count( ) == 1 # mmdet only supports single GPU testing opts = parse_args() mkdir2(opts.out_dir) vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get( 'coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] model = init_detector(opts) if opts.weights_base is not None: # for distillation purpose load_checkpoint(model, opts.weights_base) if opts.cpu_pre: img_transform = ImageTransform( size_divisor=model.cfg.data.test.size_divisor, **model.cfg.img_norm_cfg) else: img_transform = ImageTransformGPU( size_divisor=model.cfg.data.test.size_divisor, **model.cfg.img_norm_cfg) device = next(model.parameters()).device # model device n_history = model.cfg.data.train.n_history if opts.n_history is None else opts.n_history n_future = model.cfg.data.train.n_future if opts.n_future is None else opts.n_future results_ccf = [] # instance based runtime_all = [] for sid, seq in enumerate(tqdm(seqs)): # print(seq) frame_list = [img for img in db.imgs.values() if img['sid'] == sid] n_frame = len(frame_list) # load all frames in advance frames = [] for img in frame_list: img_path = join(opts.data_root, seq_dirs[sid], img['name']) frames.append(imread(img_path)) with torch.no_grad(): preprocessed = [] for i in range(n_history): data = _prepare_data(frames[i], img_transform, model.cfg, device) preprocessed.append(data) for ii in range(n_history, n_frame - n_future): # target frame iid = frame_list[ii + n_future]['id'] img_name = frame_list[ii + n_future]['name'] I = frames[ii + n_future] t_start = perf_counter() # input frame data = _prepare_data(frames[ii], img_transform, model.cfg, device) # if n_history == 0: # data_merge = data # # print(data['img']) # # print(data['img'][0].shape) # # print(data['img'][0][0][0][300][300:305]) # # import sys # # sys.exit() # else: preprocessed.append(data) # print(preprocessed[0]['img'][0].data_ptr()) # print(preprocessed[2]['img'][0].data_ptr()) # print(torch.all(preprocessed[0]['img'][0] == preprocessed[2]['img'][0])) imgs = [d['img'][0] for d in preprocessed] imgs = torch.cat(imgs, 0) imgs = imgs.unsqueeze(0) data_merge = { 'img': [imgs], 'img_meta': data['img_meta'], } # print(data_merge['img'][0][0][2][0][300][300:305]) # import sys # sys.exit() result = model(return_loss=False, rescale=True, numpy_res=True, **data_merge) bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) # if ii == 2: # print(ii, scores) # import sys # sys.exit() # if n_history != 0: del preprocessed[0] t_end = perf_counter() runtime_all.append(t_end - t_start) if vis_out: vis_path = join(opts.vis_dir, seq, img_name[:-3] + 'jpg') if opts.overwrite or not isfile(vis_path): vis_det(I, bboxes, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path) # convert to coco fmt n = len(bboxes) if n: bboxes[:, 2:] -= bboxes[:, :2] for i in range(n): result_dict = { 'image_id': iid, 'bbox': bboxes[i], 'score': scores[i], 'category_id': labels[i], } if masks is not None: result_dict['segmentation'] = masks[i] results_ccf.append(result_dict) out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'runtime_all': runtime_all, 'n_total': len(runtime_all), }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3 * x print_stats(runtime_all, 'Runtime (ms)', cvt=s2ms) out_path = join(opts.out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(opts.out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}"')
def main(): opts = parse_args() assert opts.forecast_before_assoc, "Not implemented" mkdir2(opts.out_dir) vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] results_ccf = [] in_time = 0 miss = 0 shifts = 0 given_tracks = opts.assoc == 'given' assert not given_tracks, "Not implemented" t_assoc = [] t_forecast = [] # tt = [] with torch.no_grad(): kf_F = torch.eye(8) # kf_F[3, 7] = 1 kf_Q = torch.eye(8) kf_R = 10 * torch.eye(4) kf_P_init = 100 * torch.eye(8).unsqueeze(0) for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] results = pickle.load(open(join(opts.in_dir, seq + '.pkl'), 'rb')) # use raw results when possible in case we change class subset during evaluation results_raw = results.get('results_raw', None) if results_raw is None: results_parsed = results['results_parsed'] timestamps = results['timestamps'] input_fidx = results['input_fidx'] # t1 -> det1, t2 -> det2, interpolate at t3 (t3 is the current time) det_latest_p1 = 0 # latest detection index + 1 det_t2 = None # detection index at t2 kf_x = torch.empty((0, 8, 1)) kf_P = torch.empty((0, 8, 8)) n_matched12 = 0 if not given_tracks: tkidx = 0 # track starting index for ii, img in enumerate(frame_list): # pred, gt association by time t = (ii - opts.eta) / opts.fps while det_latest_p1 < len( timestamps) and timestamps[det_latest_p1] <= t: det_latest_p1 += 1 if det_latest_p1 == 0: # no detection output miss += 1 bboxes_t3, scores, labels, tracks = [], [], [], [] else: det_latest = det_latest_p1 - 1 ifidx = input_fidx[det_latest] in_time += int(ii == ifidx) shifts += ii - ifidx if det_latest != det_t2: # new detection # we can now throw away old result (t1) # the old one is kept for forecasting purpose if len(kf_x) and opts.forecast_before_assoc: dt = ifidx - input_fidx[det_t2] dt = int( dt ) # convert from numpy to basic python format w_img, h_img = img['width'], img['height'] kf_F = make_F(kf_F, dt) kf_Q = make_Q(kf_Q, dt) kf_x, kf_P = batch_kf_predict( kf_F, kf_x, kf_P, kf_Q) bboxes_f = x2bbox(kf_x) det_t2 = det_latest if results_raw is None: result_t2 = results_parsed[det_t2] bboxes_t2, scores_t2, labels_t2, _ = result_t2[:4] if not given_tracks and len(result_t2) > 4: tracks_t2 = np.asarray(result_t2[4]) else: tracks_t2 = np.empty((0, ), np.uint32) else: result_t2 = results_raw[det_t2] bboxes_t2, scores_t2, labels_t2, _ = \ parse_det_result(result_t2, coco_mapping, n_class) t1 = perf_counter() n = len(bboxes_t2) if n: if not given_tracks: # put high scores det first for better iou matching score_argsort = np.argsort(scores_t2)[::-1] bboxes_t2 = bboxes_t2[score_argsort] scores_t2 = scores_t2[score_argsort] labels_t2 = labels_t2[score_argsort] ltrb2ltwh_(bboxes_t2) if given_tracks: raise NotImplementedError order1, order2, n_matched12 = track_based_shuffle( tracks, tracks_t2, no_unmatched1=True) tracks = tracks[order2] else: updated = False if len(kf_x): order1, order2, n_matched12, tracks, tkidx = iou_assoc( bboxes_f, labels, tracks, tkidx, bboxes_t2, labels_t2, opts.match_iou_th, no_unmatched1=True, ) if n_matched12: kf_x = kf_x[order1] kf_P = kf_P[order1] kf_x, kf_P = batch_kf_update( bbox2z(bboxes_t2[ order2[:n_matched12]]), kf_x, kf_P, kf_R, ) kf_x_new = bbox2x( bboxes_t2[order2[n_matched12:]]) n_unmatched2 = len( bboxes_t2) - n_matched12 kf_P_new = kf_P_init.expand( n_unmatched2, -1, -1) kf_x = torch.cat((kf_x, kf_x_new)) kf_P = torch.cat((kf_P, kf_P_new)) labels = labels_t2[order2] scores = scores_t2[order2] updated = True if not updated: # start from scratch kf_x = bbox2x(bboxes_t2) kf_P = kf_P_init.expand( len(bboxes_t2), -1, -1) labels = labels_t2 scores = scores_t2 if not given_tracks: tracks = np.arange(tkidx, tkidx + n, dtype=np.uint32) tkidx += n t2 = perf_counter() t_assoc.append(t2 - t1) t3 = perf_counter() if len(kf_x): dt = ii - ifidx w_img, h_img = img['width'], img['height'] # PyTorch small matrix multiplication is slow # use numpy instead # kf_F = make_F(kf_F, dt) # bboxes_t3 = x2bbox(batch_kf_predict_only(kf_F, kf_x)) # t5 = perf_counter() kf_x_np = kf_x[:, :, 0].numpy() # t6 = perf_counter() bboxes_t3 = kf_x_np[:n_matched12, : 4] + dt * kf_x_np[:n_matched12, 4:] if n_matched12 < len(kf_x): bboxes_t3 = np.concatenate( (bboxes_t3, kf_x_np[n_matched12:, :4])) bboxes_t3, keep = extrap_clean_up(bboxes_t3, w_img, h_img, lt=True) # torch_keep = torch.from_numpy(keep) # boolean mask # kf_x = kf_x[torch_keep] # kf_P = kf_P[torch_keep] # labels = labels[keep] # scores = scores[keep] # tracks = tracks[keep] labels_t3 = labels[keep] scores_t3 = scores[keep] tracks_t3 = tracks[keep] # tt.append(t6 - t5) t4 = perf_counter() t_forecast.append(t4 - t3) n = len(bboxes_t3) for i in range(n): result_dict = { 'image_id': img['id'], 'bbox': bboxes_t3[i], 'score': scores_t3[i], 'category_id': labels_t3[i], } results_ccf.append(result_dict) if vis_out: img_path = join(opts.data_root, seq_dirs[sid], img['name']) I = imread(img_path) vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg') bboxes = bboxes_t3.copy() if n: ltwh2ltrb_(bboxes) if opts.overwrite or not isfile(vis_path): vis_track( I, bboxes, tracks, labels, class_names, None, scores, out_scale=opts.vis_scale, out_file=vis_path, ) # pr.disable() # pr.dump_stats('pps_iou_lin_extrap.prof') # pickle.dump([t_assoc, t_forecast], open('pps_iou_lin_forecast_time', 'wb')) s2ms = lambda x: 1e3 * x if len(t_assoc): print_stats(t_assoc, "RT association (ms)", cvt=s2ms) if len(t_forecast): print_stats(t_forecast, "RT forecasting (ms)", cvt=s2ms) # if len(tt): # print_stats(tt, "RT forecasting2 (ms)", cvt=s2ms) out_path = join(opts.out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(opts.out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')
def main(): opts = parse_args() mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] if opts.cached_res: cache_in_ccf = '_ccf' in basename(opts.cached_res) if cache_in_ccf: # speed up based on the assumption of sequential storage cache_end_idx = 0 cached_res = pickle.load(open(opts.cached_res, 'rb')) else: assert torch.cuda.device_count( ) == 1 # mmdet only supports single GPU testing model = init_detector(opts) np.random.seed(opts.seed) runtime = pickle.load(open(opts.runtime, 'rb')) runtime_dist = dist_from_dict(runtime, opts.perf_factor) runtime_all = [] n_processed = 0 n_total = 0 for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] n_frame = len(frame_list) n_total += n_frame if not opts.cached_res: # load all frames in advance frames = [] for img in frame_list: img_path = join(opts.data_root, seq_dirs[sid], img['name']) frames.append(imread(img_path)) timestamps = [] results_raw = [] results_parsed = [] input_fidx = [] runtime = [] if opts.cached_res and cache_in_ccf: results_raw = None else: results_raw = [] for ii in range(n_frame): t = ii / opts.fps if opts.cached_res: img = frame_list[ii] if cache_in_ccf: cache_end_idx, bboxes, scores, labels, masks = \ result_from_ccf(cached_res, img['id'], cache_end_idx) ltwh2ltrb_(bboxes) else: result = cached_res[img['id']] bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) else: frame = frames[ii] result = inference_detector(model, frame) bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) rt_this = runtime_dist.draw() t_finishing = t + rt_this timestamps.append(t_finishing) if results_raw is not None: results_raw.append(result) results_parsed.append((bboxes, scores, labels, masks)) input_fidx.append(ii) runtime.append(rt_this) # since parallel excecution, the order is not guaranteed idx = np.argsort(timestamps) timestamps = [timestamps[i] for i in idx] if results_raw is not None: results_raw = [results_raw[i] for i in idx] results_parsed = [results_parsed[i] for i in idx] input_fidx = [input_fidx[i] for i in idx] runtime = [runtime[i] for i in idx] out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): out_dict = { 'results_parsed': results_parsed, 'timestamps': timestamps, 'input_fidx': input_fidx, 'runtime': runtime, } if results_raw is not None: out_dict['results_raw'] = results_raw pickle.dump(out_dict, open(out_path, 'wb')) runtime_all += runtime n_processed += len(results_parsed) runtime_all_np = np.array(runtime_all) n_small_runtime = (runtime_all_np < 1.0 / opts.fps).sum() out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump( { 'runtime_all': runtime_all, 'n_processed': n_processed, 'n_total': n_total, 'n_small_runtime': n_small_runtime, }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3 * x print(f'{n_processed}/{n_total} frames processed') print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms) print(f'Runtime smaller than unit time interval: ' f'{n_small_runtime}/{n_processed} ' f'({100.0*n_small_runtime/n_processed:.4g}%)')