def main(): opts = parse_args() out_dir = mkdir2(opts.out_dir) if opts.out_dir else opts.result_dir vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get( 'coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] results_ccf = [] in_time = 0 miss = 0 mismatch = 0 print('Pairing the output with the ground truth') for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] results = pickle.load(open(join(opts.result_dir, seq + '.pkl'), 'rb')) # use raw results when possible in case we change class subset during evaluation if opts.use_parsed: results_parsed = results['results_parsed'] else: results_raw = results.get('results_raw', None) if results_raw is None: results_parsed = results['results_parsed'] timestamps = results['timestamps'] input_fidx = results['input_fidx'] tidx_p1 = 0 for ii, img in enumerate(frame_list): # pred, gt association by time t = (ii - opts.eta) / opts.fps while tidx_p1 < len(timestamps) and timestamps[tidx_p1] <= t: tidx_p1 += 1 if tidx_p1 == 0: # no output miss += 1 bboxes, scores, labels = [], [], [] masks, tracks = None, None else: tidx = tidx_p1 - 1 ifidx = input_fidx[tidx] in_time += int(ii == ifidx) mismatch += ii - ifidx if opts.use_parsed or results_raw is None: result = results_parsed[tidx] bboxes, scores, labels, masks = result[:4] if len(result) > 4: tracks = result[4] else: tracks = None else: result = results_raw[tidx] bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) tracks = None if vis_out: img_path = join(opts.data_root, seq_dirs[sid], img['name']) I = imread(img_path) vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg') if opts.overwrite or not isfile(vis_path): if tracks is None: vis_det( I, bboxes, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path, ) else: vis_track( I, bboxes, tracks, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path, ) # convert to coco fmt n = len(bboxes) if n: bboxes_ltwh = ltrb2ltwh(bboxes) for i in range(n): result_dict = { 'image_id': img['id'], 'bbox': bboxes_ltwh[i], 'score': scores[i], 'category_id': labels[i], } if masks is not None: result_dict['segmentation'] = masks[i] results_ccf.append(result_dict) out_path = join(out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) out_path = join(out_dir, 'eval_assoc.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'miss': miss, 'in_time': in_time, 'mismatch': mismatch, }, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if opts.eval_mask: print('Evaluating instance segmentation') eval_summary = eval_ccf(db, results_ccf, iou_type='segm') out_path = join(out_dir, 'eval_summary_mask.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')
masks.append(ann['segmentation']) else: masks = None labels.append(ann['category_id']) if show_track: tracks.append(ann['track']) aidx += 1 if bboxes: bboxes = np.array(bboxes) bboxes[:, 2:] += bboxes[:, :2] out_path = join(out_dir_seq, img_name[:-3] + 'jpg') if show_track: vis_track(I, bboxes, tracks, labels, class_names, masks, out_scale=out_scale, out_file=out_path) else: vis_det(I, bboxes, labels, class_names, masks, out_scale=out_scale, out_file=out_path)
def main(): opts = parse_args() vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) print('Merging results') results_ccf = [] in_time = 0 miss = 0 shifts = 0 for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] results, timestamps, input_fidx, _ = pickle.load(open(join(opts.result_dir, seq + '.pkl'), 'rb')) tidx_p1 = 0 for i, img in enumerate(frame_list): # pred, gt association by time if vis_out: img_path = join(opts.data_root, seq_dirs[sid], img['name']) I = mmcv.imread(img_path) vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg') t = (i + 1)/opts.fps while tidx_p1 < len(timestamps) and timestamps[tidx_p1] <= t: tidx_p1 += 1 if tidx_p1 == 0: # no output miss += 1 if vis_out: if opts.vis_scale != 1: I = mmcv.imrescale(I, opts.vis_scale, interpolation='bilinear') mmcv.imwrite(I, vis_path) continue tidx = tidx_p1 - 1 result = results[tidx] ifidx = input_fidx[tidx] in_time += int(i == ifidx) shifts += i - ifidx bboxes = result['bboxes'] scores = result['scores'] labels = result['labels'] masks = result['masks'] if 'masks' in result else None tracks = result['tracks'] if 'tracks' in result else None if vis_out: if opts.overwrite or not isfile(vis_path): if tracks is None: vis_det( I, bboxes, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path, score_th=0, ) else: vis_track( I, bboxes, tracks, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path, score_th=0, ) # convert to coco fmt bboxes_ltwh = bboxes.copy() if len(bboxes_ltwh): bboxes_ltwh[:, 2:] -= bboxes_ltwh[:, :2] bboxes_ltwh = bboxes_ltwh.tolist() for j in range(len(bboxes_ltwh)): result_dict = { 'image_id': img['id'], 'bbox': bboxes_ltwh[j], 'score': scores[j], 'category_id': labels[j], } if masks is not None: result_dict['segmentation'] = masks[j] results_ccf.append(result_dict) out_path = join(opts.result_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) out_path = join(opts.result_dir, 'time_extra.txt') if opts.overwrite or not isfile(out_path): np.savetxt(out_path, [miss, in_time, shifts], fmt='%d') if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(opts.result_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')
def main(): opts = parse_args() # caches = [ # join(opts.exp_dir, 'ArgoVerse1.1-c3-eta0/output/retina50_s0.2/val/results_ccf.pkl'), # join(opts.exp_dir, 'ArgoVerse1.1-c3-eta0/output/mrcnn50_nm_s0.5/val/results_ccf.pkl'), # join(opts.exp_dir, 'ArgoVerse1.1-c3-eta0/output/mrcnn50_nm_s0.75/val/results_ccf.pkl'), # ] # # should be sorted # rtfs = [1, 2, 3] # caches = [ # join(opts.exp_dir, 'ArgoVerse1.1/output/mrcnn50_nm_th0_s0.75/val/results_ccf.pkl'), # join(opts.exp_dir, 'ArgoVerse1.1/output/mrcnn50_nm_th0_s1.0/val/results_ccf.pkl'), # ] # # should be sorted # rtfs = [3, 5] caches = [ join(opts.exp_dir, 'ArgoVerse1.1-c3-eta0/output/mrcnn50_nm_s0.75/val/results_ccf.pkl'), join(opts.exp_dir, 'ArgoVerse1.1/output/cmrcnn101_nm_s1.0/val/results_ccf.pkl'), ] # should be sorted rtfs = [3, 5] n_method = len(caches) max_history = max(rtfs) cache_ccfs = [ pickle.load(open(path, 'rb')) for path in caches ] cache_end_idx = n_method*[0] mkdir2(opts.out_dir) vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] results_ccf = [] for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] w_img, h_img = db.imgs[0]['width'], db.imgs[0]['height'] bf = BboxFilterEx( w_img, h_img, forecast=opts.forecast, forecast_before_assoc=opts.forecast_before_assoc, ) # backward in time # cur, cur - 1, ..., cur - max_history result_buffer = (max_history + 1)*[None] result_buffer_marker = (max_history + 1)*[None] min_rtf = min(rtfs) for ii, img in enumerate(frame_list): # fetch results for i in range(n_method): ifidx = ii - rtfs[i] if ifidx < 0: break cache_end_idx[i], bboxes, scores, labels = \ result_from_ccf(cache_ccfs[i], frame_list[ifidx]['id'], cache_end_idx[i], mask=False) result_buffer[rtfs[i]] = (bboxes, scores, labels) result_buffer_marker[rtfs[i]] = i == 0 if ii < min_rtf: # no method has finished yet bboxes, scores, labels, tracks = [], [], [], [] else: s = max_history while s >= 0 and result_buffer[s] is None: s -= 1 # if first result is one step ahead t = ii - s birth_tracks = True kill_tracks = True if s == max_history: bf.update(t, *result_buffer[s], None, birth_tracks, kill_tracks) bf_this = deepcopy(bf) else: bf_this = deepcopy(bf) bf_this.update(t, *result_buffer[s], None, birth_tracks, kill_tracks) while 1: # find next non-empty result s -= 1 while s >= 0 and result_buffer[s] is None: s -= 1 if s < 0: break if result_buffer_marker[s]: birth_tracks = opts.fast_birth_tracks kill_tracks = opts.fast_kill_tracks else: birth_tracks = True kill_tracks = True t = ii - s bf_this.update(t, *result_buffer[s], None, birth_tracks, kill_tracks) bboxes, scores, labels, _, tracks = \ bf_this.predict(ii) # shift result buffer result_buffer.pop(max_history) result_buffer.insert(0, None) result_buffer_marker.pop(max_history) result_buffer_marker.insert(0, None) n = len(bboxes) for i in range(n): result_dict = { 'image_id': img['id'], 'bbox': bboxes[i], 'score': scores[i], 'category_id': labels[i], } # if masks is not None: # result_dict['segmentation'] = masks[i] results_ccf.append(result_dict) if vis_out: img_path = join(opts.data_root, seq_dirs[sid], img['name']) I = imread(img_path) vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg') bboxes_ltrb = ltwh2ltrb(bboxes) if n else [] if opts.overwrite or not isfile(vis_path): vis_track( I, bboxes_ltrb, tracks, labels, class_names, None, scores, out_scale=opts.vis_scale, out_file=vis_path ) out_path = join(opts.out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(opts.out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if opts.eval_mask: print('Evaluating instance segmentation') eval_summary = eval_ccf(db, results_ccf, iou_type='segm') out_path = join(opts.out_dir, 'eval_summary_mask.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')
def main(): opts = parse_args() assert opts.forecast_before_assoc, "Not implemented" mkdir2(opts.out_dir) vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] results_ccf = [] in_time = 0 miss = 0 shifts = 0 given_tracks = opts.assoc == 'given' assert not given_tracks, "Not implemented" t_assoc = [] t_forecast = [] # tt = [] with torch.no_grad(): kf_F = torch.eye(8) # kf_F[3, 7] = 1 kf_Q = torch.eye(8) kf_R = 10 * torch.eye(4) kf_P_init = 100 * torch.eye(8).unsqueeze(0) for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] results = pickle.load(open(join(opts.in_dir, seq + '.pkl'), 'rb')) # use raw results when possible in case we change class subset during evaluation results_raw = results.get('results_raw', None) if results_raw is None: results_parsed = results['results_parsed'] timestamps = results['timestamps'] input_fidx = results['input_fidx'] # t1 -> det1, t2 -> det2, interpolate at t3 (t3 is the current time) det_latest_p1 = 0 # latest detection index + 1 det_t2 = None # detection index at t2 kf_x = torch.empty((0, 8, 1)) kf_P = torch.empty((0, 8, 8)) n_matched12 = 0 if not given_tracks: tkidx = 0 # track starting index for ii, img in enumerate(frame_list): # pred, gt association by time t = (ii - opts.eta) / opts.fps while det_latest_p1 < len( timestamps) and timestamps[det_latest_p1] <= t: det_latest_p1 += 1 if det_latest_p1 == 0: # no detection output miss += 1 bboxes_t3, scores, labels, tracks = [], [], [], [] else: det_latest = det_latest_p1 - 1 ifidx = input_fidx[det_latest] in_time += int(ii == ifidx) shifts += ii - ifidx if det_latest != det_t2: # new detection # we can now throw away old result (t1) # the old one is kept for forecasting purpose if len(kf_x) and opts.forecast_before_assoc: dt = ifidx - input_fidx[det_t2] dt = int( dt ) # convert from numpy to basic python format w_img, h_img = img['width'], img['height'] kf_F = make_F(kf_F, dt) kf_Q = make_Q(kf_Q, dt) kf_x, kf_P = batch_kf_predict( kf_F, kf_x, kf_P, kf_Q) bboxes_f = x2bbox(kf_x) det_t2 = det_latest if results_raw is None: result_t2 = results_parsed[det_t2] bboxes_t2, scores_t2, labels_t2, _ = result_t2[:4] if not given_tracks and len(result_t2) > 4: tracks_t2 = np.asarray(result_t2[4]) else: tracks_t2 = np.empty((0, ), np.uint32) else: result_t2 = results_raw[det_t2] bboxes_t2, scores_t2, labels_t2, _ = \ parse_det_result(result_t2, coco_mapping, n_class) t1 = perf_counter() n = len(bboxes_t2) if n: if not given_tracks: # put high scores det first for better iou matching score_argsort = np.argsort(scores_t2)[::-1] bboxes_t2 = bboxes_t2[score_argsort] scores_t2 = scores_t2[score_argsort] labels_t2 = labels_t2[score_argsort] ltrb2ltwh_(bboxes_t2) if given_tracks: raise NotImplementedError order1, order2, n_matched12 = track_based_shuffle( tracks, tracks_t2, no_unmatched1=True) tracks = tracks[order2] else: updated = False if len(kf_x): order1, order2, n_matched12, tracks, tkidx = iou_assoc( bboxes_f, labels, tracks, tkidx, bboxes_t2, labels_t2, opts.match_iou_th, no_unmatched1=True, ) if n_matched12: kf_x = kf_x[order1] kf_P = kf_P[order1] kf_x, kf_P = batch_kf_update( bbox2z(bboxes_t2[ order2[:n_matched12]]), kf_x, kf_P, kf_R, ) kf_x_new = bbox2x( bboxes_t2[order2[n_matched12:]]) n_unmatched2 = len( bboxes_t2) - n_matched12 kf_P_new = kf_P_init.expand( n_unmatched2, -1, -1) kf_x = torch.cat((kf_x, kf_x_new)) kf_P = torch.cat((kf_P, kf_P_new)) labels = labels_t2[order2] scores = scores_t2[order2] updated = True if not updated: # start from scratch kf_x = bbox2x(bboxes_t2) kf_P = kf_P_init.expand( len(bboxes_t2), -1, -1) labels = labels_t2 scores = scores_t2 if not given_tracks: tracks = np.arange(tkidx, tkidx + n, dtype=np.uint32) tkidx += n t2 = perf_counter() t_assoc.append(t2 - t1) t3 = perf_counter() if len(kf_x): dt = ii - ifidx w_img, h_img = img['width'], img['height'] # PyTorch small matrix multiplication is slow # use numpy instead # kf_F = make_F(kf_F, dt) # bboxes_t3 = x2bbox(batch_kf_predict_only(kf_F, kf_x)) # t5 = perf_counter() kf_x_np = kf_x[:, :, 0].numpy() # t6 = perf_counter() bboxes_t3 = kf_x_np[:n_matched12, : 4] + dt * kf_x_np[:n_matched12, 4:] if n_matched12 < len(kf_x): bboxes_t3 = np.concatenate( (bboxes_t3, kf_x_np[n_matched12:, :4])) bboxes_t3, keep = extrap_clean_up(bboxes_t3, w_img, h_img, lt=True) # torch_keep = torch.from_numpy(keep) # boolean mask # kf_x = kf_x[torch_keep] # kf_P = kf_P[torch_keep] # labels = labels[keep] # scores = scores[keep] # tracks = tracks[keep] labels_t3 = labels[keep] scores_t3 = scores[keep] tracks_t3 = tracks[keep] # tt.append(t6 - t5) t4 = perf_counter() t_forecast.append(t4 - t3) n = len(bboxes_t3) for i in range(n): result_dict = { 'image_id': img['id'], 'bbox': bboxes_t3[i], 'score': scores_t3[i], 'category_id': labels_t3[i], } results_ccf.append(result_dict) if vis_out: img_path = join(opts.data_root, seq_dirs[sid], img['name']) I = imread(img_path) vis_path = join(opts.vis_dir, seq, img['name'][:-3] + 'jpg') bboxes = bboxes_t3.copy() if n: ltwh2ltrb_(bboxes) if opts.overwrite or not isfile(vis_path): vis_track( I, bboxes, tracks, labels, class_names, None, scores, out_scale=opts.vis_scale, out_file=vis_path, ) # pr.disable() # pr.dump_stats('pps_iou_lin_extrap.prof') # pickle.dump([t_assoc, t_forecast], open('pps_iou_lin_forecast_time', 'wb')) s2ms = lambda x: 1e3 * x if len(t_assoc): print_stats(t_assoc, "RT association (ms)", cvt=s2ms) if len(t_forecast): print_stats(t_forecast, "RT forecasting (ms)", cvt=s2ms) # if len(tt): # print_stats(tt, "RT forecasting2 (ms)", cvt=s2ms) out_path = join(opts.out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(opts.out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')