def frameAP(opt, print_info=True): redo = opt.redo th = opt.th split = 'val' model_name = opt.model_name Dataset = get_dataset(opt.dataset) dataset = Dataset(opt, split) inference_dirname = opt.inference_dir print('inference_dirname is ', inference_dirname) print('threshold is ', th) vlist = dataset._test_videos[opt.split - 1] # load per-frame detections frame_detections_file = os.path.join(inference_dirname, 'frame_detections.pkl') if os.path.isfile(frame_detections_file) and not redo: with open(frame_detections_file, 'rb') as fid: alldets = pickle.load(fid) else: alldets = load_frame_detections(opt, dataset, opt.K, vlist, inference_dirname) try: with open(frame_detections_file, 'wb') as fid: pickle.dump(alldets, fid, protocol=4) except: print( "OverflowError: cannot serialize a bytes object larger than 4 GiB" ) results = {} # compute AP for each class for ilabel, label in enumerate(dataset.labels): # detections of this class detections = alldets[alldets[:, 2] == ilabel, :] # load ground-truth of this class gt = {} for iv, v in enumerate(vlist): tubes = dataset._gttubes[v] if ilabel not in tubes: continue for tube in tubes[ilabel]: for i in range(tube.shape[0]): k = (iv, int(tube[i, 0])) if k not in gt: gt[k] = [] gt[k].append(tube[i, 1:5].tolist()) for k in gt: gt[k] = np.array(gt[k]) # pr will be an array containing precision-recall values pr = np.empty((detections.shape[0] + 1, 2), dtype=np.float32) # precision,recall pr[0, 0] = 1.0 pr[0, 1] = 0.0 fn = sum([g.shape[0] for g in gt.values()]) # false negatives fp = 0 # false positives tp = 0 # true positives for i, j in enumerate(np.argsort(-detections[:, 3])): k = (int(detections[j, 0]), int(detections[j, 1])) box = detections[j, 4:8] ispositive = False if k in gt: ious = iou2d(gt[k], box) amax = np.argmax(ious) if ious[amax] >= th: ispositive = True gt[k] = np.delete(gt[k], amax, 0) if gt[k].size == 0: del gt[k] if ispositive: tp += 1 fn -= 1 else: fp += 1 pr[i + 1, 0] = float(tp) / float(tp + fp) pr[i + 1, 1] = float(tp) / float(tp + fn) results[label] = pr # display results ap = 100 * np.array([pr_to_ap(results[label]) for label in dataset.labels]) frameap_result = np.mean(ap) if print_info: log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+') log_file.write('\nTask_{} frameAP_{}\n'.format(model_name, th)) print('Task_{} frameAP_{}\n'.format(model_name, th)) log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", frameap_result)) log_file.close() print("{:20s} {:8.2f}".format("mAP", frameap_result)) return frameap_result
def frameAP(opt, print_info=True): redo = opt.redo th = opt.th split = 'val' model_name = opt.model_name Dataset = get_dataset(opt.dataset) dataset = Dataset(opt, split) inference_dirname = opt.inference_dir print('inference_dirname is ', inference_dirname) print('threshold is ', th) # ORIG vlist = dataset._test_videos[opt.split - 1] #vlist = dataset._train_videos[opt.split - 1] ''' # ADDED: to analyze a specific class tar_class = 'wave' vlist_filt = [] for vv in range(len(vlist)): cls_name, clip_name = vlist[vv].split('/') if cls_name == tar_class: vlist_filt.append(vlist[vv]) vlist = vlist_filt ''' # load per-frame detections frame_detections_file = os.path.join(inference_dirname, 'frame_detections.pkl') if os.path.isfile(frame_detections_file) and not redo: print('load previous linking results...') print('if you want to reproduce it, please add --redo') with open(frame_detections_file, 'rb') as fid: alldets = pickle.load(fid) else: if opt.inference_mode == 'stream': alldets = load_frame_detections_stream(opt, dataset, opt.K, vlist, inference_dirname) else: alldets = load_frame_detections(opt, dataset, opt.K, vlist, inference_dirname) try: with open(frame_detections_file, 'wb') as fid: pickle.dump(alldets, fid, protocol=4) except: print( "OverflowError: cannot serialize a bytes object larger than 4 GiB" ) results = {} # compute AP for each class for ilabel, label in enumerate(dataset.labels): # e.g.: 0, 'brush_hair' # detections of this class detections = alldets[alldets[:, 2] == ilabel, :] # load ground-truth of this class gt = {} for iv, v in enumerate(vlist): tubes = dataset._gttubes[v] if ilabel not in tubes: continue for tube in tubes[ilabel]: for i in range(tube.shape[0]): # for each frame k = (iv, int(tube[i, 0])) # video id, frame id if k not in gt: # if not yet added to gt gt[k] = [] gt[k].append(tube[i, 1:5].tolist()) for k in gt: gt[k] = np.array(gt[k]) # added: record of the original gt of a class (it won't be deleted or modified during evaluation) if opt.evaluation_mode == 'trimmed': gt_past = copy.deepcopy(gt) gt_keys_list = list(gt.keys()) gt_vid = [] for vv in gt_keys_list: if vv[0] in gt_vid: continue gt_vid.append(vv[0]) # pr will be an array containing precision-recall values #pr = np.empty((detections.shape[0] + 1, 2), dtype=np.float32) # precision,recall pr = -1 * np.ones( (detections.shape[0] + 1, 2), dtype=np.float32) # precision,recall pr[0, 0] = 1.0 pr[0, 1] = 0.0 fn = sum( [g.shape[0] for g in gt.values()] ) # false negatives # ALPHA: == number of frames (each frame has exactly 1 action instance) fp = 0 # false positives tp = 0 # true positives ''' # Below may not be needed now if detection is conducted on all frames # ADDED: remove potential fn (when not evaluating all frames)? # Confirmed: can still be used when evaluating the whole set (at least for JHMDB) if opt.dataset == 'hmdb': num_tp = 0 prev_k = (-1, -1) for ii, jj in enumerate(detections): k = (int(detections[ii, 0]), int(detections[ii, 1])) # (video id, frame id) if k in gt and k != prev_k: num_tp += 1 prev_k = k fn = num_tp ''' for i, j in enumerate( np.argsort(-detections[:, 3] )): # j: index of the det (highest to lowest score) k = (int(detections[j, 0]), int(detections[j, 1]) ) # (video id, frame id) box = detections[j, 4:8] ispositive = False if k in gt: ious = iou2d(gt[k], box) amax = np.argmax(ious) if ious[amax] >= th: ispositive = True gt[k] = np.delete(gt[k], amax, 0) if gt[k].size == 0: del gt[k] # untrimmed evaluation (for ucf24) # basically, when a frame is not in the non-modified gt list but its video id appears ... if opt.evaluation_mode == 'trimmed': if k[0] in gt_vid and not (k in gt_past): continue if ispositive: tp += 1 fn -= 1 else: fp += 1 # ADDED: to avoid division by zero error; is it needed? if tp + fp == 0 or tp + fn == 0: continue pr[i + 1, 0] = float(tp) / float(tp + fp) pr[i + 1, 1] = float(tp) / float(tp + fn) pr_trimmed = pr[pr[:, 0] != -1] results[label] = pr_trimmed # display results ap = 100 * np.array([pr_to_ap(results[label]) for label in dataset.labels]) # ADDED: display individual class performance frameap_percls = {} for cl, cls_name in enumerate(dataset.labels): frameap_percls[cls_name] = ap[cl] for key, value in frameap_percls.items(): print(key, ':', value) frameap_result = np.mean(ap) if print_info: log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+') log_file.write('\nTask_{} frameAP_{}\n'.format(model_name, th)) print('Task_{} frameAP_{}\n'.format(model_name, th)) log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", frameap_result)) log_file.close() print("{:20s} {:8.2f}".format("mAP", frameap_result)) return frameap_result
def frameAP_error(opt, redo=False): th = opt.th split = 'val' Dataset = get_dataset(opt.dataset) dataset = Dataset(opt, split) inference_dirname = opt.inference_dir print('inference_dirname is ', inference_dirname) print('threshold is ', th) eval_file = os.path.join(inference_dirname, "frameAP{:g}ErrorAnalysis.pkl".format(th)) if os.path.isfile(eval_file) and not redo: with open(eval_file, 'rb') as fid: res = pickle.load(fid) else: vlist = dataset._test_videos[opt.split - 1] # load per- frame detections frame_detections_file = os.path.join(inference_dirname, 'frame_detections.pkl') if os.path.isfile(frame_detections_file) and not redo: print('load frameAP pre-result') with open(frame_detections_file, 'rb') as fid: alldets = pickle.load(fid) else: alldets = load_frame_detections(opt, dataset, opt.K, vlist, inference_dirname) with open(frame_detections_file, 'wb') as fid: pickle.dump(alldets, fid) res = {} # alldets: list of numpy array with <video_index> <frame_index> <ilabel> <score> <x1> <y1> <x2> <y2> # compute AP for each class print(len(dataset.labels)) for ilabel, label in enumerate(dataset.labels): # detections of this class detections = alldets[alldets[:, 2] == ilabel, :] gt = {} othergt = {} labellist = {} # iv,v : 0 Basketball/v_Basketball_g01_c01 for iv, v in enumerate(vlist): # tubes: dict {ilabel: (list of)<frame number> <x1> <y1> <x2> <y2>} tubes = dataset._gttubes[v] # labellist[iv]: label list for v labellist[iv] = tubes.keys() for il in tubes: # tube: list of <frame number> <x1> <y1> <x2> <y2> for tube in tubes[il]: for i in range(tube.shape[0]): # k: (video_index, frame_index) k = (iv, int(tube[i, 0])) if il == ilabel: if k not in gt: gt[k] = [] gt[k].append(tube[i, 1:5].tolist()) else: if k not in othergt: othergt[k] = [] othergt[k].append(tube[i, 1:5].tolist()) for k in gt: gt[k] = np.array(gt[k]) for k in othergt: othergt[k] = np.array(othergt[k]) dupgt = deepcopy(gt) # pr will be an array containing precision-recall values and 4 types of errors: # localization, classification, timing, others pr = np.empty((detections.shape[0] + 1, 6), dtype=np.float32) # precision, recall pr[0, 0] = 1.0 pr[0, 1:] = 0.0 fn = sum([g.shape[0] for g in gt.values()]) # false negatives fp = 0 # false positives tp = 0 # true positives EL = 0 # localization errors EC = 0 # classification error: overlap >=0.5 with an another object EO = 0 # other errors ET = 0 # timing error: the video contains the action but not at this frame for i, j in enumerate(np.argsort(-detections[:, 3])): k = (int(detections[j, 0]), int(detections[j, 1])) box = detections[j, 4:8] ispositive = False if k in dupgt: if k in gt: ious = iou2d(gt[k], box) amax = np.argmax(ious) if k in gt and ious[amax] >= th: ispositive = True gt[k] = np.delete(gt[k], amax, 0) if gt[k].size == 0: del gt[k] else: EL += 1 elif k in othergt: ious = iou2d(othergt[k], box) if np.max(ious) >= th: EC += 1 else: EO += 1 elif ilabel in labellist[k[0]]: ET += 1 else: EO += 1 if ispositive: tp += 1 fn -= 1 else: fp += 1 pr[i + 1, 0] = float(tp) / float(tp + fp) # precision pr[i + 1, 1] = float(tp) / float(tp + fn) # recall pr[i + 1, 2] = float(EL) / float(tp + fp) pr[i + 1, 3] = float(EC) / float(tp + fp) pr[i + 1, 4] = float(ET) / float(tp + fp) pr[i + 1, 5] = float(EO) / float(tp + fp) res[label] = pr # save results with open(eval_file, 'wb') as fid: pickle.dump(res, fid) # display results AP = 100 * np.array( [pr_to_ap(res[label][:, [0, 1]]) for label in dataset.labels]) othersap = [ 100 * np.array([pr_to_ap(res[label][:, [j, 1]]) for label in dataset.labels]) for j in range(2, 6) ] EL = othersap[0] EC = othersap[1] ET = othersap[2] EO = othersap[3] # missed detections = 1 - recall EM = 100 - 100 * np.array([res[label][-1, 1] for label in dataset.labels]) LIST = [AP, EL, EC, ET, EO, EM] print('Error Analysis') print("") print("{:20s} {:8s} {:8s} {:8s} {:8s} {:8s} {:8s}".format( 'label', ' AP ', ' Loc. ', ' Cls. ', ' Time ', ' Other ', ' missed ')) print("") for il, label in enumerate(dataset.labels): print("{:20s} ".format(label) + " ".join(["{:8.2f}".format(L[il]) for L in LIST])) print("") print("{:20s} ".format("mean") + " ".join(["{:8.2f}".format(np.mean(L)) for L in LIST])) print("")
def build_tubes(opt): print('inference finish, start building tubes!', flush=True) K = opt.K dataset = VisualizationDataset(opt) outfile = os.path.join(opt.inference_dir, "tubes.pkl") RES = {} # load detected tubelets VDets = {} for startframe in range(1, dataset._nframes + 2 - K): resname = os.path.join(opt.inference_dir, "{:0>5}.pkl".format(startframe)) if not os.path.isfile(resname): print("ERROR: Missing extracted tubelets " + resname, flush=True) sys.exit() with open(resname, 'rb') as fid: VDets[startframe] = pickle.load(fid) for ilabel in range(opt.num_classes): FINISHED_TUBES = [] CURRENT_TUBES = [] # tubes is a list of tuple (frame, lstubelets) # calculate average scores of tubelets in tubes def tubescore(tt): return np.mean(np.array([tt[i][1][-1] for i in range(len(tt))])) for frame in range(1, dataset._nframes + 2 - K): # load boxes of the new frame and do nms while keeping Nkeep highest scored ltubelets = VDets[frame][ ilabel + 1] # [:,range(4*K) + [4*K + 1 + ilabel]] Nx(4K+1) with (x1 y1 x2 y2)*K ilabel-score ltubelets = nms_tubelets(ltubelets, 0.6, top_k=10) # just start new tubes if frame == 1: for i in range(ltubelets.shape[0]): CURRENT_TUBES.append([(1, ltubelets[i, :])]) continue # sort current tubes according to average score avgscore = [tubescore(t) for t in CURRENT_TUBES] argsort = np.argsort(-np.array(avgscore)) CURRENT_TUBES = [CURRENT_TUBES[i] for i in argsort] # loop over tubes finished = [] for it, t in enumerate(CURRENT_TUBES): # compute ious between the last box of t and ltubelets last_frame, last_tubelet = t[-1] ious = [] offset = frame - last_frame if offset < K: nov = K - offset ious = sum([ iou2d( ltubelets[:, 4 * iov:4 * iov + 4], last_tubelet[4 * (iov + offset):4 * (iov + offset + 1)]) for iov in range(nov) ]) / float(nov) else: ious = iou2d(ltubelets[:, :4], last_tubelet[4 * K - 4:4 * K]) valid = np.where(ious >= 0.5)[0] if valid.size > 0: # take the one with maximum score idx = valid[np.argmax(ltubelets[valid, -1])] CURRENT_TUBES[it].append((frame, ltubelets[idx, :])) ltubelets = np.delete(ltubelets, idx, axis=0) else: if offset >= opt.K: finished.append(it) # finished tubes that are done for it in finished[:: -1]: # process in reverse order to delete them with the right index why --++-- FINISHED_TUBES.append(CURRENT_TUBES[it][:]) del CURRENT_TUBES[it] # start new tubes for i in range(ltubelets.shape[0]): CURRENT_TUBES.append([(frame, ltubelets[i, :])]) # all tubes are not finished FINISHED_TUBES += CURRENT_TUBES # build real tubes output = [] for t in FINISHED_TUBES: score = tubescore(t) # just start new tubes if score < 0.005: continue beginframe = t[0][0] endframe = t[-1][0] + K - 1 length = endframe + 1 - beginframe # delete tubes with short duraton if length < 15: continue # build final tubes by average the tubelets out = np.zeros((length, 6), dtype=np.float32) out[:, 0] = np.arange(beginframe, endframe + 1) n_per_frame = np.zeros((length, 1), dtype=np.int32) for i in range(len(t)): frame, box = t[i] for k in range(K): out[frame - beginframe + k, 1:5] += box[4 * k:4 * k + 4] out[frame - beginframe + k, -1] += box[-1] # single frame confidence n_per_frame[frame - beginframe + k, 0] += 1 out[:, 1:] /= n_per_frame output.append([out, score]) # out: [num_frames, (frame idx, x1, y1, x2, y2, score)] RES[ilabel] = output # RES{ilabel:[(out[length,6],score)]}ilabel[0,...] os.system("rm -rf " + opt.inference_dir + "/*.pkl") with open(outfile, 'wb') as fid: pickle.dump(RES, fid)