def detect(re_detect=True, save_path='my_test_relation_prediction.json', top_tree=10, overlap=0.3, iou_thr=0.3): dataset = VidVRD(anno_rpath=anno_rpath, video_rpath=video_rpath, splits=splits) with open(os.path.join(get_model_path(), 'baseline_setting.json'), 'r') as fin: param = json.load(fin) if re_detect: short_term_relations = model.predict(dataset, param) with open(short_term_predication_path, 'w+') as stp_out_f: stp_out_f.write(json.dumps(short_term_relations)) print("Successfully save short-term predication to: " + short_term_predication_path) else: with open(short_term_predication_path, 'r') as stp_in_f: short_term_relations = json.load(stp_in_f) # print('greedy relational association ...') print('origin mht association...') video_relations = dict() for vid in tqdm(short_term_relations.keys()): # res = association.greedy_relational_association(short_term_relations[vid], param['seg_topk']) res = origin_mht_relational_association(short_term_relations[vid], param['seg_topk'], top_tree=top_tree, overlap=overlap, iou_thr=iou_thr) res = sorted(res, key=lambda r: r['score'], reverse=True)[:param['video_topk']] video_relations[vid] = res # save detection result with open(os.path.join(get_model_path(), save_path), 'w+') as fout: output = { 'version': 'VERSION 1.0', 'results': video_relations } json.dump(output, fout)
def load_relation_feature(): """ Test loading precomputed relation features """ dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos', ['train', 'test']) extractor = feature.FeatureExtractor(dataset, prefetch_count=0) video_indices = dataset.get_index(split='train') for vid in video_indices: durations = set( rel_inst['duration'] for rel_inst in dataset.get_relation_insts(vid, no_traj=True)) for duration in durations: segs = segment_video(*duration) for fstart, fend in segs: extractor.extract_feature(dataset, vid, fstart, fend, verbose=True) video_indices = dataset.get_index(split='test') for vid in video_indices: anno = dataset.get_anno(vid) segs = segment_video(0, anno['frame_count']) for fstart, fend in segs: extractor.extract_feature(dataset, vid, fstart, fend, verbose=True)
def train(): dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos', ['train', 'test']) param = dict() param['model_name'] = 'baseline' param['rng_seed'] = 1701 param['max_sampling_in_batch'] = 32 param['batch_size'] = 64 param['learning_rate'] = 0.001 param['weight_decay'] = 0.0 param['max_iter'] = 5000 param['display_freq'] = 1 param['save_freq'] = 5000 param['epsilon'] = 1e-8 param['pair_topk'] = 20 param['seg_topk'] = 200 print(param) model.train(dataset, param)
def train(): dataset = VidVRD(anno_rpath=anno_rpath, video_rpath=video_rpath, splits=splits) param = dict() param['model_name'] = 'baseline' param['rng_seed'] = 1701 param['max_sampling_in_batch'] = 32 param['batch_size'] = 64 param['learning_rate'] = 0.001 param['weight_decay'] = 0.0 param['max_iter'] = 5000 param['display_freq'] = 1 param['save_freq'] = 5000 param['epsilon'] = 1e-8 param['pair_topk'] = 20 param['seg_topk'] = 200 print(param) model.train(dataset, param)
def load_object_trajectory_proposal(): """ Test loading precomputed object trajectory proposals """ dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos', ['train', 'test']) video_indices = dataset.get_index(split='train') for vid in video_indices: durations = set( rel_inst['duration'] for rel_inst in dataset.get_relation_insts(vid, no_traj=True)) for duration in durations: segs = segment_video(*duration) for fstart, fend in segs: trajs = trajectory.object_trajectory_proposal(dataset, vid, fstart, fend, gt=False, verbose=True) trajs = trajectory.object_trajectory_proposal(dataset, vid, fstart, fend, gt=True, verbose=True) video_indices = dataset.get_index(split='test') for vid in video_indices: anno = dataset.get_anno(vid) segs = segment_video(0, anno['frame_count']) for fstart, fend in segs: trajs = trajectory.object_trajectory_proposal(dataset, vid, fstart, fend, gt=False, verbose=True) trajs = trajectory.object_trajectory_proposal(dataset, vid, fstart, fend, gt=True, verbose=True)
def detect(): dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos', ['train', 'test']) with open(os.path.join(get_model_path(), 'baseline_setting.json'), 'r') as fin: param = json.load(fin) short_term_relations = model.predict(dataset, param) # group short term relations by video video_st_relations = defaultdict(list) for index, st_rel in short_term_relations.items(): vid = index[0] video_st_relations[vid].append((index, st_rel)) # video-level visual relation detection by relational association print('greedy relational association ...') video_relations = dict() for vid in tqdm(video_st_relations.keys()): video_relations[vid] = association.greedy_relational_association( dataset, video_st_relations[vid], max_traj_num_in_clip=100) # save detection result with open( os.path.join(get_model_path(), 'baseline_relation_prediction.json'), 'w') as fout: output = {'version': 'VERSION 1.0', 'results': video_relations} json.dump(output, fout)
def eval_short_term_relation(): """ Evaluate short-term relation prediction """ anno_rpath = 'baseline/vidvrd-dataset' video_rpath = 'baseline/vidvrd-dataset/videos' splits = ['train', 'test'] st_prediction = 'baseline/vidvrd-dataset/vidvrd-baseline-output/short-term-predication.json' test_st_pred = '/home/daivd/Downloads/pad_result_24000_test_predicate_-1_pair_nms_0.4_rpn_nms_0.7_0.255_union.json' res_path = test_st_pred dataset = VidVRD(anno_rpath=anno_rpath, video_rpath=video_rpath, splits=splits) with open(os.path.join(get_model_path(), 'baseline_setting.json'), 'r') as fin: param = json.load(fin) if os.path.exists(res_path): with open(res_path, 'r') as fin: short_term_relations = json.load(fin) else: short_term_relations = model.predict(dataset, param) with open(res_path, 'w') as fout: json.dump(short_term_relations, fout) short_term_gt = dict() short_term_pred = dict() video_indices = dataset.get_index(split='test') for vid in video_indices: anno = dataset.get_anno(vid) segs = segment_video(0, anno['frame_count']) video_gts = dataset.get_relation_insts(vid) if 'results' in short_term_relations.keys(): video_preds = short_term_relations['results'][vid] else: video_preds = short_term_relations[vid] for fstart, fend in segs: vsig = get_segment_signature(vid, fstart, fend) segment_gts = [] for r in video_gts: s = max(r['duration'][0], fstart) e = min(r['duration'][1], fend) if s < e: sub_trac = r['sub_traj'][s - r['duration'][0]: e - r['duration'][0]] obj_trac = r['obj_traj'][s - r['duration'][0]: e - r['duration'][0]] segment_gts.append({ "triplet": r['triplet'], "subject_tid": r['subject_tid'], "object_tid": r['object_tid'], "duration": [s, e], "sub_traj": sub_trac, "obj_traj": obj_trac }) short_term_gt[vsig] = segment_gts segment_preds = [] for r in video_preds: if fstart <= r['duration'][0] and r['duration'][1] <= fend: s = max(r['duration'][0], fstart) e = min(r['duration'][1], fend) sub_trac = r['sub_traj'][s - r['duration'][0]: e - r['duration'][0]] obj_trac = r['obj_traj'][s - r['duration'][0]: e - r['duration'][0]] segment_preds.append({ "triplet": r['triplet'], "score": r['score'], "duration": [s, e], "sub_traj": sub_trac, "obj_traj": obj_trac }) short_term_pred[vsig] = segment_preds for each_vsig in short_term_gt.keys(): if each_vsig not in short_term_pred.keys(): short_term_pred[each_vsig] = [] mean_ap, rec_at_n, mprec_at_n = eval_visual_relation(short_term_gt, short_term_pred) print('detection mean AP (used in challenge): {}'.format(mean_ap)) print('detection recall@50: {}'.format(rec_at_n[50])) print('detection recall@100: {}'.format(rec_at_n[100])) print('tagging precision@1: {}'.format(mprec_at_n[1])) print('tagging precision@5: {}'.format(mprec_at_n[5])) print('tagging precision@10: {}'.format(mprec_at_n[10]))
if len(zs_gt_relations) > 0: groundtruth[vid] = zs_gt_relations zs_prediction[vid] = [] for r in prediction[vid]: if tuple(r['triplet']) in zeroshot_triplets: zs_prediction[vid].append(r) mean_ap, rec_at_n, mprec_at_n = eval_visual_relation( groundtruth, zs_prediction) if __name__ == '__main__': anno_rpath = 'vidvrd-dataset' video_rpath = '' splits = ['test'] dataset = VidVRD(anno_rpath=anno_rpath, video_rpath=video_rpath, splits=splits) top_tree = 20 overlap = 0.3 iou_thr = 0.8 test_vid = 'ILSVRC2015_train_00066007' config = '{}_{}_{}'.format(top_tree, overlap, iou_thr) prediction_out = 'test_out_{}.json'.format(config) if os.path.exists(prediction_out): print('Loading prediction from {}'.format(prediction_out)) with open(prediction_out, 'r') as fin: result = json.load(fin)
help='the dataset name for evaluation') parser.add_argument('split', type=str, help='the split name for evaluation') parser.add_argument('task', choices=['object', 'action', 'relation'], help='which task to evaluate') parser.add_argument('prediction', type=str, help='Corresponding prediction JSON file') args = parser.parse_args() if args.dataset == 'vidvrd': if args.task == 'relation': # load train set for zero-shot evaluation dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos', ['train', args.split]) else: dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos', [args.split]) elif args.dataset == 'vidor': if args.task == 'relation': # load train set for zero-shot evaluation dataset = VidOR('../vidor-dataset/annotation', '../vidor-dataset/video', ['training', args.split], low_memory=True) else: dataset = VidOR('../vidor-dataset/annotation', '../vidor-dataset/video', [args.split], low_memory=True) else: raise Exception('Unknown dataset {}'.format(args.dataset))
anno_rpath = 'baseline/vidvrd-dataset' video_rpath = 'baseline/vidvrd-dataset/videos' splits = ['train', 'test'] st_prediction = 'baseline/vidvrd-dataset/vidvrd-baseline-output/short-term-predication.json' top_tree = 20 overlap = 0.2 iou_thr = 0.2 test_result_name = 'mht_test_relation_prediction_v4_{}_{}_{}.json'.format(top_tree, overlap, iou_thr) prediction = os.path.join('baseline/vidvrd-dataset/vidvrd-baseline-output/models', test_result_name) short = False dataset = VidVRD(anno_rpath=anno_rpath, video_rpath=video_rpath, splits=splits) if short: print('Loading prediction from {}'.format(st_prediction)) with open(st_prediction, 'r') as fin: pred = json.load(fin) if not short: print('Number of videos in prediction: {}'.format(len(pred))) # modify the split ['train', 'test'] evaluate_relation(dataset, 'test', pred, segment=short) if not short: print('Loading prediction from {}'.format(prediction))
# # print(len(gt_segs_keys), len(gt_segs2_keys)) # # for id, segs in gt_segs2.items(): # print(id) # print(len(segs)) # print(gt_segs[id]) # break anno_rpath = 'baseline/vidvrd-dataset' video_rpath = 'baseline/vidvrd-dataset/videos' splits = ['train', 'test'] prediction = 'baseline/vidvrd-dataset/vidvrd-baseline-output/models/baseline_relation_prediction.json' st_prediction = 'baseline/vidvrd-dataset/vidvrd-baseline-output/short-term-predication.json' dataset = VidVRD(anno_rpath=anno_rpath, video_rpath=video_rpath, splits=splits) video_indices = dataset.get_index(split='test') with open(st_prediction, 'r') as st_pre_f: pred_segs = json.load(st_pre_f) short_term_gt = dict() short_term_pred = dict() for vid in video_indices: gt = dataset.get_relation_insts(vid) pred = pred_segs[vid] gt_segs = separate_vid_2_seg(gt) for each_gt_seg in gt_segs: