def main( ground_truth_filename, prediction_filename, subset="validation", tiou_thr=0.5, verbose=True, check_status=True ): anet_detection = ANETdetection( ground_truth_filename, prediction_filename, subset=subset, tiou_thr=tiou_thr, verbose=verbose, check_status=True ) anet_detection.evaluate()
def main(ground_truth_filename, prediction_filename, subset='test', tiou_thresholds=np.linspace(0.1, 0.9, 9), verbose=True, check_status=True): anet_detection = ANETdetection(ground_truth_filename, prediction_filename, subset=subset, tiou_thresholds=tiou_thresholds, verbose=verbose, check_status=True) anet_detection.evaluate()
def main(ground_truth_filename, prediction_filename, subset='validation', tiou_thresholds=np.linspace(0.5, 0.95, 10), verbose=True, check_status=True): anet_detection = ANETdetection(ground_truth_filename, prediction_filename, subset=subset, tiou_thresholds=tiou_thresholds, verbose=verbose, check_status=True) anet_detection.evaluate()
def main(ground_truth_filename, prediction_filename, subset='testing', tiou_thresholds=np.linspace(0.5, 0.95, 10), verbose=False, check_status=True): anet_detection = ANETdetection(ground_truth_filename, prediction_filename, subset=subset, tiou_thresholds=tiou_thresholds, verbose=verbose, check_status=False) anet_detection.evaluate() return anet_detection.mAP.mean()
def evaluation_detection_testset(): app = ANETdetection( ground_truth_filename="./activity_net_test.v1-3.min.json", prediction_filename="./output/result_detect_cuhk_100_t1.json", subset='test', verbose=True, check_status=False) app.evaluate()
def main(ground_truth_filename, prediction_filename, subset='validation', tiou_thr=0.5, verbose=True, check_status=True): anet_detection = ANETdetection(ground_truth_filename, prediction_filename, subset=subset, tiou_thr=tiou_thr, verbose=verbose, check_status=True) anet_detection.evaluate()
def evaluation_detection(args, eval_file): ground_truth_filename = './Evaluation/data/thumos14.json' anet_detection = ANETdetection(ground_truth_filename, eval_file, tiou_thresholds=np.linspace(0.1, 0.9, 9), subset='test', verbose=True, check_status=False) anet_detection.evaluate() ap = anet_detection.ap mAP = anet_detection.mAP return mAP
def getscore(ground_truth_filename, prediction_filename, tiou_thr=0.5,subset='validation', verbose=True, check_status=True): anet_detection = ANETdetection(ground_truth_filename, prediction_filename, subset=subset, tiou_thr=tiou_thr, verbose=verbose, check_status=True) ap = anet_detection.evaluate() return ap
def main(ground_truth_filename, prediction_filename, subset='validation', tiou_thresholds=np.linspace(0.5, 0.95, 10), verbose=True, check_status=True): anet_detection = ANETdetection(ground_truth_filename, prediction_filename, subset=subset, tiou_thresholds=tiou_thresholds, verbose=verbose, check_status=False) tt = time.time() anet_detection.evaluate() print("It takes {}s".format(time.time() - tt))
def main(ground_truth_filename, prediction_filename, subset='test', tiou_thresholds='THUMOS14', verbose=True, check_status=True): tiou_thresholds = tiou_thre_dict[tiou_thresholds] anet_detection = ANETdetection( ground_truth_filename, prediction_filename, subset=subset, tiou_thresholds=tiou_thresholds, verbose=verbose, check_status=(config.DATASET_NAME == 'ActivityNet12')) anet_detection.evaluate()
def evaluation_detection(opt): app = ANETdetection( ground_truth_filename="./Evaluation/data/activity_net_1_3_new.json", prediction_filename=os.path.join(opt['output'], "result_detect_cuhk_100_t1.json"), subset='validation', verbose=True, check_status=False) app.evaluate() parent_path, run_id = os.path.split(os.path.normpath(opt['output'])) mAP_at_tIoU = [ f'mAP@{t:.2f} {mAP*100:.3f}' for t, mAP in zip(app.tiou_thresholds, app.mAP) ] results = f'[{run_id}|Detection] average-mAP {app.average_mAP*100:.3f} {" ".join(mAP_at_tIoU)}' print(results) with open(os.path.join(parent_path, 'results.txt'), 'a') as fobj: fobj.write(f'{results}\n')
def main(ground_truth_filename, prediction_filename, subset='validation', tiou_thresholds=np.linspace(0.5, 0.95, 10), verbose=True, check_status=True): anet_detection = ANETdetection(ground_truth_filename, prediction_filename, subset=subset, tiou_thresholds=tiou_thresholds, verbose=verbose, check_status=True) res = anet_detection.evaluate() return res
def test(itr, dataset, args, model, logger, device): model.eval() done = False instance_logits_stack = [] element_logits_stack = [] labels_stack = [] print("TESTING") while not done: features, labels, done = dataset.load_data(is_training=False) features = torch.from_numpy(features).float().to(device) features = features.unsqueeze(0) _, element_logits = model(features) element_logits = element_logits.squeeze(0) tmp = (F.softmax( torch.mean( torch.topk(element_logits, k=int(np.ceil(len(features) / args.topk)), dim=0)[0], dim=0, ), dim=0, ).cpu().data.numpy()) element_logits = element_logits.cpu().data.numpy() instance_logits_stack.append(tmp) element_logits_stack.append(element_logits) labels_stack.append(labels) instance_logits_stack = np.array(instance_logits_stack) labels_stack = np.array(labels_stack) iou = [0.1, 0.3, 0.5, 0.7] dmap_detect = ANETdetection(dataset.path_to_annotations, iou, args=args) dmap_detect._import_prediction(element_logits_stack) dmap = dmap_detect.evaluate() if args.dataset_name == "Thumos14": test_set = sio.loadmat("test_set_meta.mat")["test_videos"][0] for i in range(np.shape(labels_stack)[0]): if test_set[i]["background_video"] == "YES": labels_stack[i, :] = np.zeros_like(labels_stack[i, :]) cmap = cmAP(instance_logits_stack, labels_stack) print("Classification map %f" % cmap) for k in range(len(iou)): print("Detection map @ %f = %f" % (iou[k], dmap[k] * 100)) return (dmap[-1] + dmap[-2])
def eval_mAP(ground_truth_filename, prediction_filename, subset='test', tiou_thresholds=np.linspace(0.1, 0.9, 9), verbose=True, check_status=True): anet_detection = ANETdetection( ground_truth_filename, prediction_filename, subset=subset, tiou_thresholds=tiou_thresholds, verbose=verbose, check_status=(config.DATASET_NAME == 'ActivityNet12')) return anet_detection.evaluate()
def test_net(tdcnn_demo, dataloader, args, split, max_per_video=0, thresh=0): np.random.seed(cfg.RNG_SEED) total_video_num = len(dataloader) * args.batch_size all_twins = [[[] for _ in range(total_video_num)] for _ in range(args.num_classes) ] # class_num,video_num,proposal_num tdcnn_demo.eval() empty_array = np.transpose(np.array([[], [], []]), (1, 0)) for data_idx, (support_data, video_data, gt_twins, num_gt, video_info) in tqdm(enumerate(dataloader), desc="evaluation"): if is_debug and data_idx > fast_eval_samples: break video_data = video_data.cuda() for i in range(args.shot): support_data[i] = support_data[i].cuda() gt_twins = gt_twins.cuda() batch_size = video_data.shape[0] rois, cls_prob, twin_pred = tdcnn_demo( video_data, gt_twins, support_data ) ##torch.Size([1, 300, 3]),torch.Size([1, 300, 2]),torch.Size([1, 300, 4]) scores_all = cls_prob.data twins = rois.data[:, :, 1:3] if cfg.TEST.TWIN_REG: # True # Apply bounding-twin regression deltas twin_deltas = twin_pred.data if cfg.TRAIN.TWIN_NORMALIZE_TARGETS_PRECOMPUTED: # True # Optionally normalize targets by a precomputed mean and stdev twin_deltas = twin_deltas.view(-1, 2) * torch.FloatTensor( cfg.TRAIN.TWIN_NORMALIZE_STDS ).type_as(twin_deltas) + torch.FloatTensor( cfg.TRAIN.TWIN_NORMALIZE_MEANS).type_as(twin_deltas) twin_deltas = twin_deltas.view( batch_size, -1, 2 * args.num_classes) # torch.Size([1, 300, 4]) pred_twins_all = twin_transform_inv( twins, twin_deltas, batch_size) # torch.Size([1, 300, 4]) pred_twins_all = clip_twins(pred_twins_all, cfg.TRAIN.LENGTH[0], batch_size) # torch.Size([1, 300, 4]) else: # Simply repeat the twins, once for each class pred_twins_all = np.tile(twins, (1, scores_all.shape[1])) for b in range(batch_size): if is_debug: logger.info(video_info) scores = scores_all[b] # scores.squeeze() pred_twins = pred_twins_all[b] # .squeeze() # skip j = 0, because it's the background class for j in range(1, args.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_twins = pred_twins[inds][:, j * 2:(j + 1) * 2] cls_dets = torch.cat((cls_twins, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_twins, cls_scores), 1) cls_dets = cls_dets[order] keep = nms_cpu(cls_dets.cpu(), args.test_nms) if (len(keep) > 0): if is_debug: print("after nms, keep {}".format(len(keep))) cls_dets = cls_dets[keep.view(-1).long()] else: print( "warning, after nms, none of the rois is kept!!!") all_twins[j][data_idx * batch_size + b] = cls_dets.cpu().numpy() else: all_twins[j][data_idx * batch_size + b] = empty_array # Limit to max_per_video detections *over all classes*, useless code here, default max_per_video = 0 if max_per_video > 0: video_scores = np.hstack([ all_twins[j][data_idx * batch_size + b][:, -1] for j in range(1, args.num_classes) ]) if len(video_scores) > max_per_video: video_thresh = np.sort(video_scores)[-max_per_video] for j in range(1, args.num_classes): keep = np.where( all_twins[j][data_idx * batch_size + b][:, -1] >= video_thresh)[0] all_twins[j][data_idx * batch_size + b] = all_twins[j][data_idx * batch_size + b][keep, :] # logger.info('im_detect: {:d}/{:d}'.format(i * batch_size + b + 1, len(dataloader))) pred = dict() pred['external_data'] = '' pred['version'] = '' pred['results'] = dict() for i_video in tqdm(range(total_video_num), desc="generating prediction json.."): if is_debug and i_video > fast_eval_samples * batch_size - 2: break item_pre = [] for j_roi in range( 0, len(all_twins[1][i_video]) ): # binary class problem, here we only consider class_num=1, ignoring background class _d = dict() _d['score'] = all_twins[1][i_video][j_roi][2].item() _d['label'] = 'c1' _d['segment'] = [ all_twins[1][i_video][j_roi][0].item(), all_twins[1][i_video][j_roi][1].item() ] item_pre.append(_d) pred['results']["query_%05d" % i_video] = item_pre predict_filename = os.path.join(logger.get_logger_dir(), '{}_pred.json'.format(split)) ground_truth_filename = os.path.join('preprocess/{}'.format(args.dataset), '{}_gt.json'.format(split)) with open(predict_filename, 'w') as f: json.dump(pred, f) logger.info('dump pred.json complete..') sys.path.insert(0, "evaluation") from eval_detection import ANETdetection anet_detection = ANETdetection(ground_truth_filename, predict_filename, subset="test", tiou_thresholds=tiou_thresholds, verbose=True, check_status=False) anet_detection.evaluate() ap = anet_detection.mAP mAP = ap[0] return mAP, ap
remove_background=True) # test on offical evaluation code id2name = {} for name, ids in names.iteritems(): id2name[ids['class']] = name output = gen_json(classified_proposals, id2name) with open('prediction.json', 'w') as f: json.dump(output, f) eval1 = ANETdetection('activity_net.v1-3.min.json', 'prediction.json', subset='validation', tiou_thr=0.5, verbose=True, check_status=False) eval1.evaluate() eval2 = ANETdetection('activity_net.v1-3.min.json', 'prediction.json', subset='validation', tiou_thr=0.75, verbose=True, check_status=False) eval2.evaluate() eval3 = ANETdetection('activity_net.v1-3.min.json', 'prediction.json', subset='validation', tiou_thr=0.95, verbose=True, check_status=False) eval3.evaluate()
def detect(prediction_filename, ground_truth_filename = 'activity_net.v1-3.min.json',output_log=''): anet_detection = ANETdetection(ground_truth_filename, prediction_filename, subset='validation', tiou_thresholds=np.linspace(0.5, 0.95, 10), verbose=True, check_status=True) anet_detection.evaluate(output_log) return anet_detection.mAP
def evaluate(itr, dataset, model, logger, groundtruth_filename, prediction_filename, device=torch.device('cuda'), background=True, subset='test', fps=25, stride=16, threshold_type='mean', frame_type='max', adjust_mean=1.0, act_weight=1.0, tiou_thresholds=np.linspace(0.1, 0.7, 7), use_anchor=False): ''' generate proposals and evaluate ''' with open(groundtruth_filename, 'r') as fr: gt_info = json.load(fr)['database'] save_dict = {'version': dataset.dataset_name, 'external_data': 'None'} frame_dict = {'version': dataset.dataset_name, 'external_data': 'abc'} rs = defaultdict(list) rs2 = defaultdict(list) instance_logits_stack = [] labels_stack = [] inds = dataset.get_testidx() classlist = dataset.get_classlist() tps = [0] * len(classlist) aps = [0] * len(classlist) res = [0] * len(classlist) one_hots = np.eye(len(classlist)) for idx in inds: feat = dataset.get_feature(idx) vname = dataset.get_vname(idx) duration = dataset.get_duration(idx) feat = torch.from_numpy(np.expand_dims(feat, axis=0)).float().to(device) frame_label = dataset.get_gt_frame_label(idx) video_label = dataset.get_video_label(idx) if len(video_label) < 1: continue with torch.no_grad(): _, logits_f, _, logits_r, tcam, att_logits_f, att_logits_r, att_logits = model( Variable(feat), device, is_training=False) logits_f, logits_r, tcam = logits_f[0], logits_r[0], tcam[0] topk = int(np.ceil(len(feat[0]) / 8)) tmp = F.softmax(torch.mean(torch.topk(logits_f, k=topk, dim=0)[0], dim=0), dim=0).cpu().data.numpy() tmp += F.softmax(torch.mean(torch.topk(logits_r, k=topk, dim=0)[0], dim=0), dim=0).cpu().data.numpy() tmp += F.softmax(torch.mean(torch.topk(tcam, k=topk, dim=0)[0], dim=0), dim=0).cpu().data.numpy() if background: tcam = tcam[:, 1:] tmp = tmp[1:] instance_logits_stack.append(tmp) labels_stack.append(np.sum(one_hots[video_label], axis=0)) tcam = tcam.cpu().data.numpy() pred_label = np.argmax(tcam, axis=-1) assert len(pred_label) == len(frame_label) for gt in frame_label: for g in gt: res[g - 1] += 1 score = np.zeros((len(tcam), 1)) if use_anchor: att = att_logits[0].cpu().data.numpy() score = att.squeeze() segments, frames = generate_segment(tcam, score, threshold_type=threshold_type, frame_type=frame_type, act_weight=act_weight, adjust_mean=adjust_mean) fps = gt_info[vname].get('fps', fps) for frame in frames: aps[frame[0]] += 1 if frame[0] in frame_label[frame[1]]: tps[frame[0]] += 1 rs2[vname] += [{ 'score': float(frame[2] / 100.0), 'label': classlist[frame[0]].decode('utf-8'), 'frame': float(frame[1] * stride / fps) }] for seg in segments: rs[vname] += [{ 'score': float(seg[3] / 100.0), 'label': str(classlist[seg[0]].decode('utf-8')), 'segment': [float(seg[1] * stride / fps), float(seg[2] * stride / fps)] }] save_dict['results'] = rs frame_dict['results'] = rs2 with open(prediction_filename, 'w') as fw: json.dump(save_dict, fw) frame_detection = FrameDetection(groundtruth_filename, frame_dict, subset=subset, verbose=True, check_status=False) frame_detection.evaluate() anet_detection = ANETdetection(groundtruth_filename, save_dict, subset=subset, tiou_thresholds=tiou_thresholds, verbose=True, check_status=False) dmap = anet_detection.evaluate() for i in range(len(dmap)): logger.log_value('mAP/IoU@%s' % (str(tiou_thresholds[i])), dmap[i], itr) labels_stack = np.array(labels_stack) instance_logits_stack = np.array(instance_logits_stack) cmap = cmAP(instance_logits_stack, labels_stack) print(cmap) tp = np.sum(tps) ap = np.sum(aps) recall = np.sum(res) print( 'All act frames %d, predict all frames : %d, right frames: %d, AP: %0.5f, Recall: %0.5f' % (recall, ap, tp, tp / ap, tp / recall)) acc = dmap[-3] return np.mean(acc)
# load the predicted data pred_data = np.load(results_npy, allow_pickle=True).item() pred_labels = pred_data['pred_labels'] logits = pred_data['logits'] alpha = np.array([ 0.1, 0.2, 0.3, 0.4, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95 ]) # compute action localization mAP anet_detection = ANETdetection(gt_json_path, results_json, subset='testing', tiou_thresholds=alpha, verbose=True, check_status=True) anet_detection.evaluate() localization_score_str = "Action localization scores: \n" \ "Average mAP= {} \n".format(anet_detection.average_mAP) + \ "alpha = " + " & ".join(str(alpha).split()) + "\n " \ "mAP scores =" +\ " & ".join(str(np.around(anet_detection.mAP, 2)).split()) + "\n " # Compute classification mAP anet_classification = ANETclassification(gt_json_path, results_json, subset='testing', verbose=True) anet_classification.evaluate() classification_score_str = "Action classification scores: \n" \ "mAP = {} \n".format(anet_classification.ap.mean())