def main(): assert torch.cuda.device_count( ) == 1 # mmdet only supports single GPU testing opts = parse_args() mkdir2(opts.out_dir) vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get( 'coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] model = init_detector(opts) results_raw = [] # image based, all 80 COCO classes results_ccf = [] # instance based for iid, img in tqdm(db.imgs.items()): img_name = img['name'] sid = img['sid'] seq_name = seqs[sid] img_path = join(opts.data_root, seq_dirs[sid], img_name) I = imread(img_path) result = inference_detector(model, I, gpu_pre=not opts.cpu_pre) results_raw.append(result) bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) if vis_out: vis_path = join(opts.vis_dir, seq_name, img_name[:-3] + 'jpg') if opts.overwrite or not isfile(vis_path): vis_det(I, bboxes, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path) # convert to coco fmt n = len(bboxes) if n: ltrb2ltwh_(bboxes) for i in range(n): result_dict = { 'image_id': iid, 'bbox': bboxes[i], 'score': scores[i], 'category_id': labels[i], } if masks is not None: result_dict['segmentation'] = masks[i] results_ccf.append(result_dict) out_path = join(opts.out_dir, 'results_raw.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_raw, open(out_path, 'wb')) out_path = join(opts.out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(opts.out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if opts.eval_mask: print('Evaluating instance segmentation') eval_summary = eval_ccf(db, results_ccf, iou_type='segm') out_path = join(opts.out_dir, 'eval_summary_mask.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}"')
for fid, name in enumerate(seq_imgs): det = pickle.load(open(join(det_dir, seq, name[:-3] + 'pkl'), 'rb')) bboxes, masks = det bboxes = [bboxes[c] for c in class_subset] masks = [masks[c] for c in class_subset] # convert to coco fmt for c, row in enumerate(bboxes): for i in range(row.shape[0]): bbox = row[i] score = bbox[4] if score < det_th: continue bbox = bbox[:4] ltrb2ltwh_(bbox) mask = masks[c][i] mask['counts'] = mask['counts'].decode(encoding='UTF-8') annots.append({ 'id': aid, 'image_id': iid, 'bbox': bbox.tolist(), 'category_id': c, 'segmentation': mask, 'area': float(maskUtils.area(mask)), 'iscrowd': False, 'ignore': False, })
def main(): assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing opts = parse_args() mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] img = db.imgs[0] w_img, h_img = img['width'], img['height'] mp.set_start_method('spawn') frame_recv, frame_send = mp.Pipe(False) det_res_recv, det_res_send = mp.Pipe(False) det_proc = mp.Process(target=det_process, args=(opts, frame_recv, det_res_send, w_img, h_img)) det_proc.start() if opts.dynamic_schedule: runtime = pickle.load(open(opts.runtime, 'rb')) runtime_dist = dist_from_dict(runtime, opts.perf_factor) mean_rtf = runtime_dist.mean()*opts.fps n_total = 0 t_det_all = [] t_send_frame_all = [] t_recv_res_all = [] t_assoc_all = [] t_forecast_all = [] with torch.no_grad(): kf_F = torch.eye(8) kf_Q = torch.eye(8) kf_R = 10*torch.eye(4) kf_P_init = 100*torch.eye(8).unsqueeze(0) for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] frame_list = [join(opts.data_root, seq_dirs[sid], img['name']) for img in frame_list] n_frame = len(frame_list) n_total += n_frame timestamps = [] results_parsed = [] input_fidx = [] processing = False fidx_t2 = None # detection input index at t2 fidx_latest = None tkidx = 0 # track starting index kf_x = torch.empty((0, 8, 1)) kf_P = torch.empty((0, 8, 8)) n_matched12 = 0 # let detector process to read all the frames frame_send.send(frame_list) # it is possible that unfetched results remain in the pipe while 1: msg = det_res_recv.recv() # wait till the detector is ready if msg == 'ready': break elif isinstance(msg, Exception): raise msg t_total = n_frame/opts.fps t_unit = 1/opts.fps t_start = perf_counter() while 1: t1 = perf_counter() t_elapsed = t1 - t_start if t_elapsed >= t_total: break # identify latest available frame fidx_continous = t_elapsed*opts.fps fidx = int(np.floor(fidx_continous)) if fidx == fidx_latest: # algorithm is fast and has some idle time wait_for_next = True else: wait_for_next = False if opts.dynamic_schedule: if mean_rtf >= 1: # when runtime < 1, it should always process every frame fidx_remainder = fidx_continous - fidx if mean_rtf < np.floor(fidx_remainder + mean_rtf): # wait till next frame wait_for_next = True if wait_for_next: # sleep continue if not processing: t_start_frame = perf_counter() frame_send.send((fidx, t_start_frame)) fidx_latest = fidx processing = True # wait till query - forecast-rt-ub wait_time = t_unit - opts.forecast_rt_ub if det_res_recv.poll(wait_time): # wait # new result result = det_res_recv.recv() if isinstance(result, Exception): raise result result, t_send_frame, t_start_res = result bboxes_t2, scores_t2, labels_t2, _ = \ parse_det_result(result, coco_mapping, n_class) processing = False t_det_end = perf_counter() t_det_all.append(t_det_end - t_start_frame) t_send_frame_all.append(t_send_frame) t_recv_res_all.append(t_det_end - t_start_res) # associate across frames t_assoc_start = perf_counter() if len(kf_x): dt = fidx_latest - fidx_t2 kf_F = make_F(kf_F, dt) kf_Q = make_Q(kf_Q, dt) kf_x, kf_P = batch_kf_predict(kf_F, kf_x, kf_P, kf_Q) bboxes_f = x2bbox(kf_x) fidx_t2 = fidx_latest n = len(bboxes_t2) if n: # put high scores det first for better iou matching score_argsort = np.argsort(scores_t2)[::-1] bboxes_t2 = bboxes_t2[score_argsort] scores_t2 = scores_t2[score_argsort] labels_t2 = labels_t2[score_argsort] ltrb2ltwh_(bboxes_t2) updated = False if len(kf_x): order1, order2, n_matched12, tracks, tkidx = iou_assoc( bboxes_f, labels, tracks, tkidx, bboxes_t2, labels_t2, opts.match_iou_th, no_unmatched1=True, ) if n_matched12: kf_x = kf_x[order1] kf_P = kf_P[order1] kf_x, kf_P = batch_kf_update( bbox2z(bboxes_t2[order2[:n_matched12]]), kf_x, kf_P, kf_R, ) kf_x_new = bbox2x(bboxes_t2[order2[n_matched12:]]) n_unmatched2 = len(bboxes_t2) - n_matched12 kf_P_new = kf_P_init.expand(n_unmatched2, -1, -1) kf_x = torch.cat((kf_x, kf_x_new)) kf_P = torch.cat((kf_P, kf_P_new)) labels = labels_t2[order2] scores = scores_t2[order2] updated = True if not updated: # start from scratch kf_x = bbox2x(bboxes_t2) kf_P = kf_P_init.expand(len(bboxes_t2), -1, -1) labels = labels_t2 scores = scores_t2 tracks = np.arange(tkidx, tkidx + n, dtype=np.uint32) tkidx += n t_assoc_end = perf_counter() t_assoc_all.append(t_assoc_end - t_assoc_start) # apply forecasting for the current query t_forecast_start = perf_counter() query_pointer = fidx + opts.eta + 1 if len(kf_x): dt = (query_pointer - fidx_t2) kf_x_np = kf_x[:, :, 0].numpy() bboxes_t3 = kf_x_np[:n_matched12, :4] + dt*kf_x_np[:n_matched12, 4:] if n_matched12 < len(kf_x): bboxes_t3 = np.concatenate((bboxes_t3, kf_x_np[n_matched12:, :4])) bboxes_t3, keep = extrap_clean_up(bboxes_t3, w_img, h_img, lt=True) labels_t3 = labels[keep] scores_t3 = scores[keep] tracks_t3 = tracks[keep] else: bboxes_t3 = np.empty((0, 4), dtype=np.float32) scores_t3 = np.empty((0,), dtype=np.float32) labels_t3 = np.empty((0,), dtype=np.int32) tracks_t3 = np.empty((0,), dtype=np.int32) t_forecast_end = perf_counter() t_forecast_all.append(t_forecast_end - t_forecast_start) t3 = perf_counter() t_elapsed = t3 - t_start if t_elapsed >= t_total: break if len(bboxes_t3): ltwh2ltrb_(bboxes_t3) if fidx_t2 is not None: timestamps.append(t_elapsed) results_parsed.append((bboxes_t3, scores_t3, labels_t3, None, tracks_t3)) input_fidx.append(fidx_t2) out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'results_parsed': results_parsed, 'timestamps': timestamps, 'input_fidx': input_fidx, }, open(out_path, 'wb')) # terminates the child process frame_send.send(None) out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'n_total': n_total, 't_det': t_det_all, 't_send_frame': t_send_frame_all, 't_recv_res': t_recv_res_all, 't_assoc': t_assoc_all, 't_forecast': t_forecast_all, }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3*x print_stats(t_det_all, 'Runtime detection (ms)', cvt=s2ms) print_stats(t_send_frame_all, 'Runtime sending the frame (ms)', cvt=s2ms) print_stats(t_recv_res_all, 'Runtime receiving the result (ms)', cvt=s2ms) print_stats(t_assoc_all, "Runtime association (ms)", cvt=s2ms) print_stats(t_forecast_all, "Runtime forecasting (ms)", cvt=s2ms)
def main(): opts = parse_args() assert opts.forecast_before_assoc, "Not implemented" mkdir2(opts.out_dir) vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] results_ccf = [] in_time = 0 miss = 0 shifts = 0 given_tracks = opts.assoc == 'given' assert not given_tracks, "Not implemented" t_assoc = [] t_forecast = [] # tt = [] with torch.no_grad(): kf_F = torch.eye(8) # kf_F[3, 7] = 1 kf_Q = torch.eye(8) kf_R = 10 * torch.eye(4) kf_P_init = 100 * torch.eye(8).unsqueeze(0) for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] results = pickle.load(open(join(opts.in_dir, seq + '.pkl'), 'rb')) # use raw results when possible in case we change class subset during evaluation results_raw = results.get('results_raw', None) if results_raw is None: results_parsed = results['results_parsed'] timestamps = results['timestamps'] input_fidx = results['input_fidx'] # t1 -> det1, t2 -> det2, interpolate at t3 (t3 is the current time) det_latest_p1 = 0 # latest detection index + 1 det_t2 = None # detection index at t2 kf_x = torch.empty((0, 8, 1)) kf_P = torch.empty((0, 8, 8)) n_matched12 = 0 if not given_tracks: tkidx = 0 # track starting index for ii, img in enumerate(frame_list): # pred, gt association by time t = (ii - opts.eta) / opts.fps while det_latest_p1 < len( timestamps) and timestamps[det_latest_p1] <= t: det_latest_p1 += 1 if det_latest_p1 == 0: # no detection output miss += 1 bboxes_t3, scores, labels, tracks = [], [], [], [] else: det_latest = det_latest_p1 - 1 ifidx = input_fidx[det_latest] in_time += int(ii == ifidx) shifts += ii - ifidx if det_latest != det_t2: # new detection # we can now throw away old result (t1) # the old one is kept for forecasting purpose if len(kf_x) and opts.forecast_before_assoc: dt = ifidx - input_fidx[det_t2] dt = int( dt ) # convert from numpy to basic python format w_img, h_img = img['width'], img['height'] kf_F = make_F(kf_F, dt) kf_Q = make_Q(kf_Q, dt) kf_x, kf_P = batch_kf_predict( kf_F, kf_x, kf_P, kf_Q) bboxes_f = x2bbox(kf_x) det_t2 = det_latest if results_raw is None: result_t2 = results_parsed[det_t2] bboxes_t2, scores_t2, labels_t2, _ = result_t2[:4] if not given_tracks and len(result_t2) > 4: tracks_t2 = np.asarray(result_t2[4]) else: tracks_t2 = np.empty((0, ), np.uint32) else: result_t2 = results_raw[det_t2] bboxes_t2, scores_t2, labels_t2, _ = \ parse_det_result(result_t2, coco_mapping, n_class) t1 = perf_counter() n = len(bboxes_t2) if n: if not given_tracks: # put high scores det first for better iou matching score_argsort = np.argsort(scores_t2)[::-1] bboxes_t2 = bboxes_t2[score_argsort] scores_t2 = scores_t2[score_argsort] labels_t2 = labels_t2[score_argsort] ltrb2ltwh_(bboxes_t2) if given_tracks: raise NotImplementedError order1, order2, n_matched12 = track_based_shuffle( tracks, tracks_t2, no_unmatched1=True) tracks = tracks[order2] else: updated = False if len(kf_x): order1, order2, n_matched12, tracks, tkidx = iou_assoc( bboxes_f, labels, tracks, tkidx, bboxes_t2, labels_t2, opts.match_iou_th, no_unmatched1=True, ) if n_matched12: kf_x = kf_x[order1] kf_P = kf_P[order1] kf_x, kf_P = batch_kf_update( bbox2z(bboxes_t2[ order2[:n_matched12]]), kf_x, kf_P, kf_R, ) kf_x_new = bbox2x( bboxes_t2[order2[n_matched12:]]) n_unmatched2 = len( bboxes_t2) - n_matched12 kf_P_new = kf_P_init.expand( n_unmatched2, -1, -1) kf_x = torch.cat((kf_x, kf_x_new)) kf_P = torch.cat((kf_P, kf_P_new)) labels = labels_t2[order2] scores = scores_t2[order2] updated = True if not updated: # start from scratch kf_x = bbox2x(bboxes_t2) kf_P = kf_P_init.expand( len(bboxes_t2), -1, -1) labels = labels_t2 scores = scores_t2 if not given_tracks: tracks = np.arange(tkidx, tkidx + n, dtype=np.uint32) tkidx += n t2 = perf_counter() t_assoc.append(t2 - t1) t3 = perf_counter() if len(kf_x): dt = ii - ifidx w_img, h_img = img['width'], img['height'] # PyTorch small matrix multiplication is slow # use numpy instead # kf_F = make_F(kf_F, dt) # bboxes_t3 = x2bbox(batch_kf_predict_only(kf_F, kf_x)) # t5 = perf_counter() kf_x_np = kf_x[:, :, 0].numpy() # t6 = perf_counter() bboxes_t3 = kf_x_np[:n_matched12, : 4] + dt * kf_x_np[:n_matched12, 4:] if n_matched12 < len(kf_x): bboxes_t3 = np.concatenate( (bboxes_t3, kf_x_np[n_matched12:, :4])) bboxes_t3, keep = extrap_clean_up(bboxes_t3, w_img, h_img, lt=True) # torch_keep = torch.from_numpy(keep) # boolean mask # kf_x = kf_x[torch_keep] # kf_P = kf_P[torch_keep] # labels = labels[keep] # scores = scores[keep] # tracks = tracks[keep] labels_t3 = labels[keep] scores_t3 = scores[keep] tracks_t3 = tracks[keep] # tt.append(t6 - t5) t4 = perf_counter() t_forecast.append(t4 - t3) n = len(bboxes_t3) for i in range(n): result_dict = { 'image_id': img['id'], 'bbox': bboxes_t3[i], 'score': scores_t3[i], 'category_id': labels_t3[i], } results_ccf.append(result_dict) if vis_out: img_path = join(opts.data_root, seq_dirs[sid], img['name']) I = imread(img_path) vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg') bboxes = bboxes_t3.copy() if n: ltwh2ltrb_(bboxes) if opts.overwrite or not isfile(vis_path): vis_track( I, bboxes, tracks, labels, class_names, None, scores, out_scale=opts.vis_scale, out_file=vis_path, ) # pr.disable() # pr.dump_stats('pps_iou_lin_extrap.prof') # pickle.dump([t_assoc, t_forecast], open('pps_iou_lin_forecast_time', 'wb')) s2ms = lambda x: 1e3 * x if len(t_assoc): print_stats(t_assoc, "RT association (ms)", cvt=s2ms) if len(t_forecast): print_stats(t_forecast, "RT forecasting (ms)", cvt=s2ms) # if len(tt): # print_stats(tt, "RT forecasting2 (ms)", cvt=s2ms) out_path = join(opts.out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(opts.out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')