def frameAP(opt, print_info=True): redo = opt.redo th = opt.th split = 'val' model_name = opt.model_name Dataset = get_dataset(opt.dataset) dataset = Dataset(opt, split) inference_dirname = opt.inference_dir print('inference_dirname is ', inference_dirname) print('threshold is ', th) vlist = dataset._test_videos[opt.split - 1] # load per-frame detections frame_detections_file = os.path.join(inference_dirname, 'frame_detections.pkl') if os.path.isfile(frame_detections_file) and not redo: with open(frame_detections_file, 'rb') as fid: alldets = pickle.load(fid) else: alldets = load_frame_detections(opt, dataset, opt.K, vlist, inference_dirname) try: with open(frame_detections_file, 'wb') as fid: pickle.dump(alldets, fid, protocol=4) except: print( "OverflowError: cannot serialize a bytes object larger than 4 GiB" ) results = {} # compute AP for each class for ilabel, label in enumerate(dataset.labels): # detections of this class detections = alldets[alldets[:, 2] == ilabel, :] # load ground-truth of this class gt = {} for iv, v in enumerate(vlist): tubes = dataset._gttubes[v] if ilabel not in tubes: continue for tube in tubes[ilabel]: for i in range(tube.shape[0]): k = (iv, int(tube[i, 0])) if k not in gt: gt[k] = [] gt[k].append(tube[i, 1:5].tolist()) for k in gt: gt[k] = np.array(gt[k]) # pr will be an array containing precision-recall values pr = np.empty((detections.shape[0] + 1, 2), dtype=np.float32) # precision,recall pr[0, 0] = 1.0 pr[0, 1] = 0.0 fn = sum([g.shape[0] for g in gt.values()]) # false negatives fp = 0 # false positives tp = 0 # true positives for i, j in enumerate(np.argsort(-detections[:, 3])): k = (int(detections[j, 0]), int(detections[j, 1])) box = detections[j, 4:8] ispositive = False if k in gt: ious = iou2d(gt[k], box) amax = np.argmax(ious) if ious[amax] >= th: ispositive = True gt[k] = np.delete(gt[k], amax, 0) if gt[k].size == 0: del gt[k] if ispositive: tp += 1 fn -= 1 else: fp += 1 pr[i + 1, 0] = float(tp) / float(tp + fp) pr[i + 1, 1] = float(tp) / float(tp + fn) results[label] = pr # display results ap = 100 * np.array([pr_to_ap(results[label]) for label in dataset.labels]) frameap_result = np.mean(ap) if print_info: log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+') log_file.write('\nTask_{} frameAP_{}\n'.format(model_name, th)) print('Task_{} frameAP_{}\n'.format(model_name, th)) log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", frameap_result)) log_file.close() print("{:20s} {:8.2f}".format("mAP", frameap_result)) return frameap_result
def videoAP(opt, print_info=True): th = opt.th model_name = opt.model_name split = 'val' Dataset = get_dataset(opt.dataset) dataset = Dataset(opt, split) inference_dirname = opt.inference_dir vlist = dataset._test_videos[opt.split - 1] # load detections # alldets = for each label in 1..nlabels, list of tuple (v,score,tube as Kx5 array) alldets = {ilabel: [] for ilabel in range(len(dataset.labels))} for v in vlist: tubename = os.path.join(inference_dirname, v + '_tubes.pkl') if not os.path.isfile(tubename): print("ERROR: Missing extracted tubes " + tubename) sys.exit() with open(tubename, 'rb') as fid: tubes = pickle.load(fid) for ilabel in range(len(dataset.labels)): ltubes = tubes[ilabel] idx = nms3dt(ltubes, 0.3) alldets[ilabel] += [(v, ltubes[i][1], ltubes[i][0]) for i in idx] # compute AP for each class res = {} for ilabel in range(len(dataset.labels)): detections = alldets[ilabel] # load ground-truth gt = {} for v in vlist: tubes = dataset._gttubes[v] if ilabel not in tubes: continue gt[v] = tubes[ilabel] if len(gt[v]) == 0: del gt[v] # precision,recall pr = np.empty((len(detections) + 1, 2), dtype=np.float32) pr[0, 0] = 1.0 pr[0, 1] = 0.0 fn = sum([len(g) for g in gt.values()]) # false negatives fp = 0 # false positives tp = 0 # true positives for i, j in enumerate( np.argsort(-np.array([dd[1] for dd in detections]))): v, score, tube = detections[j] ispositive = False if v in gt: ious = [iou3dt(g, tube) for g in gt[v]] amax = np.argmax(ious) if ious[amax] >= th: ispositive = True del gt[v][amax] if len(gt[v]) == 0: del gt[v] if ispositive: tp += 1 fn -= 1 else: fp += 1 pr[i + 1, 0] = float(tp) / float(tp + fp) pr[i + 1, 1] = float(tp) / float(tp + fn) res[dataset.labels[ilabel]] = pr # display results ap = 100 * np.array([pr_to_ap(res[label]) for label in dataset.labels]) videoap_result = np.mean(ap) if print_info: log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+') log_file.write('\nTask_{} VideoAP_{}\n'.format(model_name, th)) print('Task_{} VideoAP_{}\n'.format(opt.model_name, th)) # for il, _ in enumerate(dataset.labels): # print("{:20s} {:8.2f}".format('', ap[il])) # log_file.write("{:20s} {:8.2f}\n".format('', ap[il])) log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", videoap_result)) log_file.close() print("{:20s} {:8.2f}".format("mAP", videoap_result)) return videoap_result
def main(opt): set_seed(opt.seed) torch.backends.cudnn.benchmark = True print() print('dataset: ' + opt.dataset + ' task: ' + opt.task) Dataset = get_dataset(opt.dataset) opt = opts().update_dataset(opt, Dataset) train_writer = tensorboardX.SummaryWriter( log_dir=os.path.join(opt.log_dir, 'train')) epoch_train_writer = tensorboardX.SummaryWriter( log_dir=os.path.join(opt.log_dir, 'train_epoch')) val_writer = tensorboardX.SummaryWriter( log_dir=os.path.join(opt.log_dir, 'val')) epoch_val_writer = tensorboardX.SummaryWriter( log_dir=os.path.join(opt.log_dir, 'val_epoch')) logger = Logger(opt, epoch_train_writer, epoch_val_writer) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = opt.start_epoch if opt.pretrain_model == 'coco': model = load_coco_pretrained_model(opt, model) else: model = load_imagenet_pretrained_model(opt, model) if opt.load_model != '': model, optimizer, _, _ = load_model(model, opt.load_model, optimizer, opt.lr, opt.ucf_pretrain) trainer = MOCTrainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=True, worker_init_fn=worker_init_fn) val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=True, worker_init_fn=worker_init_fn) print('training...') print('GPU allocate:', opt.chunk_sizes) best_ap = 0 best_epoch = 0 stop_step = 0 for epoch in range(start_epoch + 1, opt.num_epochs + 1): print('eopch is ', epoch) log_dict_train = trainer.train(epoch, train_loader, train_writer) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'train') logger.write('train: {} {:8f} | '.format(k, v)) logger.write('\n') if opt.save_all and not opt.auto_stop: time_str = time.strftime('%Y-%m-%d-%H-%M') model_name = 'model_[{}]_{}.pth'.format(epoch, time_str) save_model(os.path.join(opt.save_dir, model_name), model, optimizer, epoch, log_dict_train['loss']) else: model_name = 'model_last.pth' save_model(os.path.join(opt.save_dir, model_name), model, optimizer, epoch, log_dict_train['loss']) # this step evaluate the model if opt.val_epoch: with torch.no_grad(): log_dict_val = trainer.val(epoch, val_loader, val_writer) for k, v in log_dict_val.items(): logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'val') logger.write('val: {} {:8f} | '.format(k, v)) logger.write('\n') if opt.auto_stop: tmp_rgb_model = opt.rgb_model tmp_flow_model = opt.flow_model if opt.rgb_model != '': opt.rgb_model = os.path.join(opt.rgb_model, model_name) if opt.flow_model != '': opt.flow_model = os.path.join(opt.flow_model, model_name) stream_inference(opt) ap = frameAP(opt, print_info=opt.print_log) os.system("rm -rf tmp") if ap > best_ap: best_ap = ap best_epoch = epoch saved1 = os.path.join(opt.save_dir, model_name) saved2 = os.path.join(opt.save_dir, 'model_best.pth') os.system("cp " + str(saved1) + " " + str(saved2)) if stop_step < len( opt.lr_step) and epoch >= opt.lr_step[stop_step]: model, optimizer, _, _ = load_model( model, os.path.join(opt.save_dir, 'model_best.pth'), optimizer, opt.lr) opt.lr = opt.lr * 0.1 logger.write('Drop LR to ' + str(opt.lr) + '\n') print('Drop LR to ' + str(opt.lr)) print('load epoch is ', best_epoch) for param_group in optimizer.param_groups: param_group['lr'] = opt.lr torch.cuda.empty_cache() trainer = MOCTrainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) stop_step = stop_step + 1 opt.rgb_model = tmp_rgb_model opt.flow_model = tmp_flow_model else: # this step drop lr if epoch in opt.lr_step: lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) logger.write('Drop LR to ' + str(lr) + '\n') for param_group in optimizer.param_groups: param_group['lr'] = lr if opt.auto_stop: print('best epoch is ', best_epoch) logger.close()
def frameAP_error(opt, redo=False): th = opt.th split = 'val' Dataset = get_dataset(opt.dataset) dataset = Dataset(opt, split) inference_dirname = opt.inference_dir print('inference_dirname is ', inference_dirname) print('threshold is ', th) eval_file = os.path.join(inference_dirname, "frameAP{:g}ErrorAnalysis.pkl".format(th)) if os.path.isfile(eval_file) and not redo: with open(eval_file, 'rb') as fid: res = pickle.load(fid) else: vlist = dataset._test_videos[opt.split - 1] # load per- frame detections frame_detections_file = os.path.join(inference_dirname, 'frame_detections.pkl') if os.path.isfile(frame_detections_file) and not redo: print('load frameAP pre-result') with open(frame_detections_file, 'rb') as fid: alldets = pickle.load(fid) else: alldets = load_frame_detections(opt, dataset, opt.K, vlist, inference_dirname) with open(frame_detections_file, 'wb') as fid: pickle.dump(alldets, fid) res = {} # alldets: list of numpy array with <video_index> <frame_index> <ilabel> <score> <x1> <y1> <x2> <y2> # compute AP for each class print(len(dataset.labels)) for ilabel, label in enumerate(dataset.labels): # detections of this class detections = alldets[alldets[:, 2] == ilabel, :] gt = {} othergt = {} labellist = {} # iv,v : 0 Basketball/v_Basketball_g01_c01 for iv, v in enumerate(vlist): # tubes: dict {ilabel: (list of)<frame number> <x1> <y1> <x2> <y2>} tubes = dataset._gttubes[v] # labellist[iv]: label list for v labellist[iv] = tubes.keys() for il in tubes: # tube: list of <frame number> <x1> <y1> <x2> <y2> for tube in tubes[il]: for i in range(tube.shape[0]): # k: (video_index, frame_index) k = (iv, int(tube[i, 0])) if il == ilabel: if k not in gt: gt[k] = [] gt[k].append(tube[i, 1:5].tolist()) else: if k not in othergt: othergt[k] = [] othergt[k].append(tube[i, 1:5].tolist()) for k in gt: gt[k] = np.array(gt[k]) for k in othergt: othergt[k] = np.array(othergt[k]) dupgt = deepcopy(gt) # pr will be an array containing precision-recall values and 4 types of errors: # localization, classification, timing, others pr = np.empty((detections.shape[0] + 1, 6), dtype=np.float32) # precision, recall pr[0, 0] = 1.0 pr[0, 1:] = 0.0 fn = sum([g.shape[0] for g in gt.values()]) # false negatives fp = 0 # false positives tp = 0 # true positives EL = 0 # localization errors EC = 0 # classification error: overlap >=0.5 with an another object EO = 0 # other errors ET = 0 # timing error: the video contains the action but not at this frame for i, j in enumerate(np.argsort(-detections[:, 3])): k = (int(detections[j, 0]), int(detections[j, 1])) box = detections[j, 4:8] ispositive = False if k in dupgt: if k in gt: ious = iou2d(gt[k], box) amax = np.argmax(ious) if k in gt and ious[amax] >= th: ispositive = True gt[k] = np.delete(gt[k], amax, 0) if gt[k].size == 0: del gt[k] else: EL += 1 elif k in othergt: ious = iou2d(othergt[k], box) if np.max(ious) >= th: EC += 1 else: EO += 1 elif ilabel in labellist[k[0]]: ET += 1 else: EO += 1 if ispositive: tp += 1 fn -= 1 else: fp += 1 pr[i + 1, 0] = float(tp) / float(tp + fp) # precision pr[i + 1, 1] = float(tp) / float(tp + fn) # recall pr[i + 1, 2] = float(EL) / float(tp + fp) pr[i + 1, 3] = float(EC) / float(tp + fp) pr[i + 1, 4] = float(ET) / float(tp + fp) pr[i + 1, 5] = float(EO) / float(tp + fp) res[label] = pr # save results with open(eval_file, 'wb') as fid: pickle.dump(res, fid) # display results AP = 100 * np.array( [pr_to_ap(res[label][:, [0, 1]]) for label in dataset.labels]) othersap = [ 100 * np.array([pr_to_ap(res[label][:, [j, 1]]) for label in dataset.labels]) for j in range(2, 6) ] EL = othersap[0] EC = othersap[1] ET = othersap[2] EO = othersap[3] # missed detections = 1 - recall EM = 100 - 100 * np.array([res[label][-1, 1] for label in dataset.labels]) LIST = [AP, EL, EC, ET, EO, EM] print('Error Analysis') print("") print("{:20s} {:8s} {:8s} {:8s} {:8s} {:8s} {:8s}".format( 'label', ' AP ', ' Loc. ', ' Cls. ', ' Time ', ' Other ', ' missed ')) print("") for il, label in enumerate(dataset.labels): print("{:20s} ".format(label) + " ".join(["{:8.2f}".format(L[il]) for L in LIST])) print("") print("{:20s} ".format("mean") + " ".join(["{:8.2f}".format(np.mean(L)) for L in LIST])) print("")
def main(opt): # added to specify gpu id; the gpus arg in the provided code does not work torch.cuda.set_device(opt.gpus[0]) set_seed(opt.seed) print('dataset: ' + opt.dataset + ' task: ' + opt.task) Dataset = get_dataset(opt.dataset) opt = opts().update_dataset(opt, Dataset) train_writer = tensorboardX.SummaryWriter( log_dir=os.path.join(opt.log_dir, 'train')) epoch_train_writer = tensorboardX.SummaryWriter( log_dir=os.path.join(opt.log_dir, 'train_epoch')) val_writer = tensorboardX.SummaryWriter( log_dir=os.path.join(opt.log_dir, 'val')) epoch_val_writer = tensorboardX.SummaryWriter( log_dir=os.path.join(opt.log_dir, 'val_epoch')) logger = Logger(opt, epoch_train_writer, epoch_val_writer) opt.device = torch.device('cuda') is_pa = False if opt.pa_model != '': is_pa = True model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K, is_pa=is_pa, pa_fuse_mode=opt.pa_fuse_mode, rgb_w3=opt.rgb_w3) # TODO: Compute grad magnitude (maybe check youssef's snippet) # TODO: Log grad to TB # default (single set of hyperparam) # Complexity analysis ''' with torch.cuda.device(1): macs, params = get_model_complexity_info(model, (15, 288, 288), input_constructor=prepare_input, as_strings=True, print_per_layer_stat=True, verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) ''' # orig #optimizer = torch.optim.Adam(model.parameters(), opt.lr) # custom lr_factor = 1.0 if opt.pa_model != '': optimizer = torch.optim.Adam([{ "params": model.pa.parameters(), "lr": opt.lr * lr_factor }, { "params": model.backbone.parameters(), "lr": opt.lr }, { "params": model.deconv_layer.parameters(), "lr": opt.lr }, { "params": model.branch.parameters(), "lr": opt.lr }], opt.lr) else: # rgb model optimizer = torch.optim.Adam([{ "params": model.backbone.parameters(), "lr": opt.lr }, { "params": model.deconv_layer.parameters(), "lr": opt.lr }, { "params": model.branch.parameters(), "lr": opt.lr }], opt.lr) start_epoch = opt.start_epoch # ADDED: allowing automatica lr dropping upon resuming a training step_count = 0 for step in range(len(opt.lr_step)): if start_epoch >= opt.lr_step[step]: step_count += 1 opt.lr = opt.lr * (opt.lr_drop**step_count) if opt.pretrain_model == 'coco': model = load_coco_pretrained_model(opt, model) elif opt.pretrain_model == 'imagenet': model = load_imagenet_pretrained_model(opt, model) else: model = load_custom_pretrained_model(opt, model) if opt.load_model != '': model, optimizer, _, _ = load_model(model, opt.load_model, optimizer, opt.lr, opt.ucf_pretrain) for i, child in enumerate(model.children()): pass #if i == 2 or i == 3: # unfreeze branch, deconv: reproducible! but not pa nor backbone # for l, param in enumerate(child.parameters()): # param.requires_grad = False ''' if i == 0: # PA continue #for l, param in enumerate(child.parameters()): #if l < 3: # 3: conv1 15: block2 #param.requires_grad = False elif i == 1: # backbone continue #for l, param in enumerate(child.parameters()): #print ('layer {} shape: {}'.format(l, param.size())) #if l == 2 or l == 3 or l == 4: # 5: conv1 and conv1_5, 30: resnext_layer1 #param.requires_grad = False elif i == 2: # deconv for l, param in enumerate(child.parameters()): param.requires_grad = False ''' #else: #for name, module in child.named_modules(): #if name in list_of_lay_freeze: #for param in module.parameters(): #param.requires_grad = False #if isinstance(module, torch.nn.ReLU): #break #print (name) trainer = MOCTrainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=True, worker_init_fn=worker_init_fn) val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=True, worker_init_fn=worker_init_fn) print('training...') print('GPU allocate:', opt.chunk_sizes) best_ap = 0 best_epoch = 0 stop_step = 0 # TODO: this needs to be adjusted otherwise lr is dropped incorrectly when resuming training! (can set to 1 now if resuming from drop-once) # added: to ensure no decrease of lr too early (for jh s1?) if stop_step == 0: drop_early_flag = False # should be False if wanting more reproducible results (e.g., jh s1) else: drop_early_flag = True set_seed(opt.seed) #317 for epoch in range(start_epoch + 1, opt.num_epochs + 1): print('eopch is ', epoch) log_dict_train = trainer.train(epoch, train_loader, train_writer) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'train') logger.write('train: {} {:8f} | '.format(k, v)) logger.write('\n') if opt.save_all and not opt.auto_stop: time_str = time.strftime('%Y-%m-%d-%H-%M') model_name = 'model_[{}]_{}.pth'.format(epoch, time_str) save_model(os.path.join(opt.save_dir, model_name), model, optimizer, epoch, log_dict_train['loss']) else: model_name = 'model_last.pth' save_model(os.path.join(opt.save_dir, model_name), model, optimizer, epoch, log_dict_train['loss']) # this step evaluate the model if opt.val_epoch: with torch.no_grad(): log_dict_val = trainer.val(epoch, val_loader, val_writer) for k, v in log_dict_val.items(): logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'val') logger.write('val: {} {:8f} | '.format(k, v)) logger.write('\n') if opt.auto_stop: tmp_rgb_model = opt.rgb_model tmp_flow_model = opt.flow_model tmp_pa_model = opt.pa_model if opt.rgb_model != '': opt.rgb_model = os.path.join(opt.rgb_model, model_name) if opt.flow_model != '': opt.flow_model = os.path.join(opt.flow_model, model_name) if opt.pa_model != '': opt.pa_model = os.path.join(opt.pa_model, model_name) # orig: difficult to handle with long-range mem #stream_inference(opt) normal_inference(opt) ap = frameAP(opt, print_info=opt.print_log) ### added for debug print('frame mAP: {}'.format(ap)) os.system("rm -rf tmp") if ap > best_ap: best_ap = ap best_epoch = epoch saved1 = os.path.join(opt.save_dir, model_name) saved2 = os.path.join(opt.save_dir, 'model_best.pth') os.system("cp " + str(saved1) + " " + str(saved2)) if stop_step < len( opt.lr_step) and epoch >= opt.lr_step[stop_step]: # added: don't want it to decrease lr too early just bc the map was higher there ... # seemed to create problem for jh s1 if drop_early_flag is False: model, optimizer, _, _ = load_model( model, os.path.join(opt.save_dir, 'model_last.pth'), optimizer, opt.lr) # model_best -> model_last? drop_early_flag = True print('load epoch is ', epoch) else: # after the first drop, the rest could drop based on mAP model, optimizer, _, _ = load_model( model, os.path.join(opt.save_dir, 'model_best.pth'), optimizer, opt.lr) # model_best -> model_last? print('load epoch is ', best_epoch) opt.lr = opt.lr * opt.lr_drop logger.write('Drop LR to ' + str(opt.lr) + '\n') for ii, param_group in enumerate(optimizer.param_groups): if ii >= 1: # backbone + deconv param_group['lr'] = opt.lr else: param_group['lr'] = opt.lr * lr_factor print('Drop PA LR to ' + str(opt.lr * lr_factor)) print('Drop backbone LR to ' + str(opt.lr)) print('Drop branch LR to ' + str(opt.lr)) torch.cuda.empty_cache() trainer = MOCTrainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) stop_step = stop_step + 1 opt.rgb_model = tmp_rgb_model opt.flow_model = tmp_flow_model opt.pa_model = tmp_pa_model else: # this step drop lr if epoch in opt.lr_step: lr = opt.lr * (opt.lr_drop**(opt.lr_step.index(epoch) + 1)) logger.write('Drop LR to ' + str(lr) + '\n') # added for debug print('Drop LR to ' + str(lr) + '\n') for param_group in optimizer.param_groups: param_group['lr'] = lr if opt.auto_stop: print('best epoch is ', best_epoch) logger.close()
def videoAP(opt, print_info=True): th = opt.th model_name = opt.model_name split = 'val' Dataset = get_dataset(opt.dataset) dataset = Dataset(opt, split) inference_dirname = opt.inference_dir vlist = dataset._test_videos[opt.split - 1] # load detections # alldets = for each label in 1..nlabels, list of tuple (v,score,tube as Kx5 array) alldets = {ilabel: [] for ilabel in range(len(dataset.labels))} for v in vlist: tubename = os.path.join(inference_dirname, v + '_tubes.pkl') if not os.path.isfile(tubename): print("ERROR: Missing extracted tubes " + tubename) sys.exit() with open(tubename, 'rb') as fid: tubes = pickle.load(fid) for ilabel in range(len(dataset.labels)): ltubes = tubes[ilabel] idx = nms3dt(ltubes, 0.3) alldets[ilabel] += [(v, ltubes[i][1], ltubes[i][0]) for i in idx] # compute AP for each class #target_class = [5] res = {} for ilabel in range(len(dataset.labels)): #if ilabel not in target_class: # continue detections = alldets[ilabel] # load ground-truth gt = {} for v in vlist: tubes = dataset._gttubes[v] if ilabel not in tubes: continue gt[v] = tubes[ilabel] if len(gt[v]) == 0: del gt[v] num_gt = len(gt) # precision,recall pr = np.empty((len(detections) + 1, 2), dtype=np.float32) pr[0, 0] = 1.0 pr[0, 1] = 0.0 fn = sum([len(g) for g in gt.values()]) # false negatives fp = 0 # false positives tp = 0 # true positives for i, j in enumerate( np.argsort(-np.array([dd[1] for dd in detections]))): v, score, tube = detections[j] ispositive = False #if v == 'jump/Gregoire_Airman_showreel_2008_jump_f_cm_np1_le_bad_2' or v == 'jump/Gregoire_Airman_showreel_2008_jump_f_cm_np1_ri_bad_5' or v == 'jump/Sommerland_Syd_sprung_in_den_tod_jump_f_cm_np1_fr_bad_0': #print() if v in gt: ious = [iou3dt(g, tube) for g in gt[v]] amax = np.argmax(ious) if ious[amax] >= th: ispositive = True del gt[v][amax] if len(gt[v]) == 0: del gt[v] if ispositive: tp += 1 fn -= 1 else: fp += 1 pr[i + 1, 0] = float(tp) / float(tp + fp) pr[i + 1, 1] = float(tp) / float(tp + fn) #if fn != 0: print('Failed to detect {}/{} tubes for class {}'.format( fn, num_gt, ilabel)) res[dataset.labels[ilabel]] = pr # display results ap = 100 * np.array([pr_to_ap(res[label]) for label in dataset.labels]) # ADDED: display individual class performance frameap_percls = {} for cl, cls_name in enumerate(dataset.labels): frameap_percls[cls_name] = ap[cl] for key, value in frameap_percls.items(): print(key, ':', value) videoap_result = np.mean(ap) if print_info: log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+') log_file.write('\nTask_{} VideoAP_{}\n'.format(model_name, th)) print('Task_{} VideoAP_{}\n'.format(opt.model_name, th)) # for il, _ in enumerate(dataset.labels): # print("{:20s} {:8.2f}".format('', ap[il])) # log_file.write("{:20s} {:8.2f}\n".format('', ap[il])) log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", videoap_result)) log_file.close() print("{:20s} {:8.2f}".format("mAP", videoap_result)) return videoap_result
def frameAP(opt, print_info=True): redo = opt.redo th = opt.th split = 'val' model_name = opt.model_name Dataset = get_dataset(opt.dataset) dataset = Dataset(opt, split) inference_dirname = opt.inference_dir print('inference_dirname is ', inference_dirname) print('threshold is ', th) # ORIG vlist = dataset._test_videos[opt.split - 1] #vlist = dataset._train_videos[opt.split - 1] ''' # ADDED: to analyze a specific class tar_class = 'wave' vlist_filt = [] for vv in range(len(vlist)): cls_name, clip_name = vlist[vv].split('/') if cls_name == tar_class: vlist_filt.append(vlist[vv]) vlist = vlist_filt ''' # load per-frame detections frame_detections_file = os.path.join(inference_dirname, 'frame_detections.pkl') if os.path.isfile(frame_detections_file) and not redo: print('load previous linking results...') print('if you want to reproduce it, please add --redo') with open(frame_detections_file, 'rb') as fid: alldets = pickle.load(fid) else: if opt.inference_mode == 'stream': alldets = load_frame_detections_stream(opt, dataset, opt.K, vlist, inference_dirname) else: alldets = load_frame_detections(opt, dataset, opt.K, vlist, inference_dirname) try: with open(frame_detections_file, 'wb') as fid: pickle.dump(alldets, fid, protocol=4) except: print( "OverflowError: cannot serialize a bytes object larger than 4 GiB" ) results = {} # compute AP for each class for ilabel, label in enumerate(dataset.labels): # e.g.: 0, 'brush_hair' # detections of this class detections = alldets[alldets[:, 2] == ilabel, :] # load ground-truth of this class gt = {} for iv, v in enumerate(vlist): tubes = dataset._gttubes[v] if ilabel not in tubes: continue for tube in tubes[ilabel]: for i in range(tube.shape[0]): # for each frame k = (iv, int(tube[i, 0])) # video id, frame id if k not in gt: # if not yet added to gt gt[k] = [] gt[k].append(tube[i, 1:5].tolist()) for k in gt: gt[k] = np.array(gt[k]) # added: record of the original gt of a class (it won't be deleted or modified during evaluation) if opt.evaluation_mode == 'trimmed': gt_past = copy.deepcopy(gt) gt_keys_list = list(gt.keys()) gt_vid = [] for vv in gt_keys_list: if vv[0] in gt_vid: continue gt_vid.append(vv[0]) # pr will be an array containing precision-recall values #pr = np.empty((detections.shape[0] + 1, 2), dtype=np.float32) # precision,recall pr = -1 * np.ones( (detections.shape[0] + 1, 2), dtype=np.float32) # precision,recall pr[0, 0] = 1.0 pr[0, 1] = 0.0 fn = sum( [g.shape[0] for g in gt.values()] ) # false negatives # ALPHA: == number of frames (each frame has exactly 1 action instance) fp = 0 # false positives tp = 0 # true positives ''' # Below may not be needed now if detection is conducted on all frames # ADDED: remove potential fn (when not evaluating all frames)? # Confirmed: can still be used when evaluating the whole set (at least for JHMDB) if opt.dataset == 'hmdb': num_tp = 0 prev_k = (-1, -1) for ii, jj in enumerate(detections): k = (int(detections[ii, 0]), int(detections[ii, 1])) # (video id, frame id) if k in gt and k != prev_k: num_tp += 1 prev_k = k fn = num_tp ''' for i, j in enumerate( np.argsort(-detections[:, 3] )): # j: index of the det (highest to lowest score) k = (int(detections[j, 0]), int(detections[j, 1]) ) # (video id, frame id) box = detections[j, 4:8] ispositive = False if k in gt: ious = iou2d(gt[k], box) amax = np.argmax(ious) if ious[amax] >= th: ispositive = True gt[k] = np.delete(gt[k], amax, 0) if gt[k].size == 0: del gt[k] # untrimmed evaluation (for ucf24) # basically, when a frame is not in the non-modified gt list but its video id appears ... if opt.evaluation_mode == 'trimmed': if k[0] in gt_vid and not (k in gt_past): continue if ispositive: tp += 1 fn -= 1 else: fp += 1 # ADDED: to avoid division by zero error; is it needed? if tp + fp == 0 or tp + fn == 0: continue pr[i + 1, 0] = float(tp) / float(tp + fp) pr[i + 1, 1] = float(tp) / float(tp + fn) pr_trimmed = pr[pr[:, 0] != -1] results[label] = pr_trimmed # display results ap = 100 * np.array([pr_to_ap(results[label]) for label in dataset.labels]) # ADDED: display individual class performance frameap_percls = {} for cl, cls_name in enumerate(dataset.labels): frameap_percls[cls_name] = ap[cl] for key, value in frameap_percls.items(): print(key, ':', value) frameap_result = np.mean(ap) if print_info: log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+') log_file.write('\nTask_{} frameAP_{}\n'.format(model_name, th)) print('Task_{} frameAP_{}\n'.format(model_name, th)) log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", frameap_result)) log_file.close() print("{:20s} {:8.2f}".format("mAP", frameap_result)) return frameap_result
def BuildTubes(opt): redo = opt.redo if not redo: print('load previous linking results...') print('if you want to reproduce it, please add --redo') Dataset = get_dataset(opt.dataset) inference_dirname = opt.inference_dir K = opt.K split = 'val' dataset = Dataset(opt, split) print('inference_dirname is ', inference_dirname) vlist = dataset._test_videos[opt.split - 1] bar = Bar('{}'.format('BuildTubes'), max=len(vlist)) # DEBUG: target cerain video / class to build tube #'shoot_ball/ImprovingBasketballSkills-BasketballTurnaroundFadeAway_shoot_ball_f_nm_np1_ri_med_0', 'shoot_ball/KELVIN_shoot_ball_u_cm_np1_ba_med_0', 'shoot_ball/KELVIN_shoot_ball_u_cm_np1_ba_med_2', #target_video = ['jump/Gregoire_Airman_showreel_2008_jump_f_cm_np1_le_bad_2'] for iv, v in enumerate(vlist): #if v not in target_video: # continue outfile = os.path.join(inference_dirname, v + "_tubes.pkl") if os.path.isfile(outfile) and not redo: continue RES = {} nframes = dataset._nframes[v] # TODO: hardcoded for jhmdb for now if dataset._nframes[v] >= K * opt.ninput: ok_frame_inds = [16, 21, 26, 31, 36, dataset._nframes[v]] else: print('video: {}; Number of frames: {}'.format(v, dataset._nframes[v])) ok_frame_inds = [dataset._nframes[v] - opt.ninput + 1, dataset._nframes[v]] #ok_frame_inds = [] #for kk in range(opt.K): # ok_frame_inds.append(max(dataset._nframes[v] - kk*opt.ninput, 1)) #ok_frame_inds.reverse() if opt.inference_mode == 'stream': # record the latest four frame index (for the final frame allocate detection) last_k_ind = [] last_k_ind_init = min(K * opt.ninput, dataset._nframes[v]) - opt.ninput + 1 last_k_ind.append(last_k_ind_init) for _ in range(opt.K - 1): last_k_ind_init = max(1, last_k_ind_init - opt.ninput) last_k_ind.append(last_k_ind_init) last_k_ind.reverse() # load detected tubelets VDets = {} # orig: for startframe in range(1, nframes + 2 - K): for startframe in range(min(K * opt.ninput , dataset._nframes[v]) - opt.ninput + 1 , 1 + dataset._nframes[v]): if opt.inference_mode == 'stream': # otherwise ignore if startframe not in ok_frame_inds: continue if startframe != min(K * opt.ninput, dataset._nframes[v]) - opt.ninput + 1: # not initial frame (ex: 16) if opt.inference_mode == 'stream': # otherwise ignore last_k_ind.append(startframe) if len(last_k_ind) > opt.K: # only keep the last K index del last_k_ind[0] resname = os.path.join(inference_dirname, v, "{:0>5}.pkl".format(startframe)) if not os.path.isfile(resname): print("ERROR: Missing extracted tubelets " + resname) sys.exit() with open(resname, 'rb') as fid: VDets[startframe] = pickle.load(fid) # added: may not be correct but proceed with learning tube building first_endframe = list(VDets.keys())[0] for ilabel in range(len(dataset.labels)): FINISHED_TUBES = [] CURRENT_TUBES = [] # tubes is a list of tuple (frame, lstubelets) # calculate average scores of tubelets in tubes def tubescore(tt): return np.mean(np.array([tt[i][1][-1] for i in range(len(tt))])) # a tube could contain multiple linked mini-tubes (linked over time); hence the for loop #orig: for frame in range(1, dataset._nframes[v] + 2 - K): for frame in range(min(K * opt.ninput , dataset._nframes[v]) - opt.ninput + 1 , 1 + dataset._nframes[v]): # load boxes of the new frame and do nms while keeping Nkeep highest scored if opt.inference_mode == 'stream': if frame not in ok_frame_inds: continue ltubelets = VDets[frame][ilabel + 1] # [:,range(4*K) + [4*K + 1 + ilabel]] Nx(4K+1) with (x1 y1 x2 y2)*K ilabel-score ltubelets = nms_tubelets(ltubelets, 0.6, top_k=10) # just start new tubes if frame == first_endframe: # orig: 1 for i in range(ltubelets.shape[0]): CURRENT_TUBES.append([(first_endframe, ltubelets[i, :])]) # orig: 1 continue # sort current tubes according to average score avgscore = [tubescore(t) for t in CURRENT_TUBES] argsort = np.argsort(-np.array(avgscore)) CURRENT_TUBES = [CURRENT_TUBES[i] for i in argsort] # loop over tubes finished = [] for it, t in enumerate(CURRENT_TUBES): # compute ious between the last box of t and ltubelets # mine interpretation: for each tube in the memory, associates with any possible current tubelet last_endframe, last_tubelet = t[-1] # confusing -1? -> # a tube could contain multiple linked mini-tubes (linked over time) ious = [] offset = round((frame - last_endframe) / opt.ninput) # orig: frame - last_endframe if offset < K: nov = K - offset # number of overlaps ious = sum([iou2d(ltubelets[:, 4 * iov:4 * iov + 4], last_tubelet[4 * (iov + offset):4 * (iov + offset + 1)]) for iov in range(nov)]) / float(nov) else: ious = iou2d(ltubelets[:, :4], last_tubelet[4 * K - 4:4 * K])# head and tail matching valid = np.where(ious >= 0.5)[0] # 0.5 if valid.size > 0: # ONLY match the best QUERY tube to the database, then delete this query # take the one with maximum score idx = valid[np.argmax(ltubelets[valid, -1])] CURRENT_TUBES[it].append((frame, ltubelets[idx, :])) ltubelets = np.delete(ltubelets, idx, axis=0) else: if offset >= opt.K: finished.append(it) # finished tubes that are done for it in finished[::-1]: # process in reverse order to delete them with the right index why --++-- FINISHED_TUBES.append(CURRENT_TUBES[it][:]) del CURRENT_TUBES[it] # start new tubes for i in range(ltubelets.shape[0]): CURRENT_TUBES.append([(frame, ltubelets[i, :])]) # all tubes are not finished FINISHED_TUBES += CURRENT_TUBES # build real tubes output = [] for t_i, t in enumerate(FINISHED_TUBES): # DEBUG #print(t_i) score = tubescore(t) # just start new tubes if score < 0.005: continue beginframe = max(t[0][0] - opt.ninput*(K-1), 1) #t[0][0] # TODO: needs to be taken care of (forward vs backward) endframe = t[-1][0] #t[-1][0] + K - 1 # TODO length = endframe + 1 - beginframe # delete tubes with short duraton (contibuting to many fp?) #if length < min(dataset._nframes[v], (K *opt.ninput - opt.ninput + 1) + opt.ninput*3): # 15 # continue if length < dataset._nframes[v] // 2: # 15 continue # build final tubes by average the tubelets out = np.zeros((length, 6), dtype=np.float32) out[:, 0] = np.arange(beginframe, endframe + 1) n_per_frame = np.zeros((length, 1), dtype=np.int32) # orig: zeros for i in range(len(t)): frame, box = t[i] # frame: end frame of a tube n_mem = K - 1 if opt.inference_mode == 'stream': # for stream detection if frame != nframes: for k in range(K): out[max(frame - k*opt.ninput, 1) - beginframe, 1:5] += box[4 * n_mem:4 * n_mem + 4] out[max(frame - k*opt.ninput, 1) - beginframe, -1] += box[-1] n_per_frame[max(frame - k*opt.ninput, 1) - beginframe, 0] += 1 n_mem -= 1 # for the last frame else: for k in reversed(range(0, K)): out[last_k_ind[k] - beginframe, 1:5] += box[4 * n_mem:4 * n_mem + 4] out[last_k_ind[k] - beginframe, -1] += box[-1] n_per_frame[last_k_ind[k] - beginframe, 0] += 1 n_mem -= 1 else: for k in range(K): out[max(frame - k*opt.ninput, 1) - beginframe, 1:5] += box[4 * n_mem:4 * n_mem + 4] out[max(frame - k*opt.ninput, 1) - beginframe, -1] += box[-1] n_per_frame[max(frame - k*opt.ninput, 1) - beginframe, 0] += 1 n_mem -= 1 ''' orig for i in range(len(t)): frame, box = t[i] for k in range(K): out[frame - beginframe + k, 1:5] += box[4 * k:4 * k + 4] # avg effect on the box coord; more stable? out[frame - beginframe + k, -1] += box[-1] # single frame confidence n_per_frame[frame - beginframe + k, 0] += 1 ''' nonzero_ind =n_per_frame!=0 # sparse! would be dividing a lot of zeros out[nonzero_ind[:,0], 1:] /= n_per_frame[nonzero_ind[:,0]] # orig #out[:, 1:] /= n_per_frame if 0 in out[:,-1]: # if any frame index contains 0 (meaning not filled! This line was creating issues!) #print ('Frame detection interpolation takes place!') # ADDED: extrapolation? nonzero_ind = np.where(nonzero_ind==True)[0] nz_v_prev = -5 nonzero_nonlink = [] for nz_i, nz_v in enumerate(nonzero_ind): nz_offset = nz_v - nz_v_prev if nz_offset == 1: nz_v_prev = nz_v continue if nz_i > 0: nonzero_nonlink.append((nz_v_prev, nz_v)) nz_v_prev = nz_v for idx, lo_hi in enumerate(nonzero_nonlink): lo_hi_dist = lo_hi[1] - lo_hi[0] lo_box = out[lo_hi[0], 1:] hi_box = out[lo_hi[1], 1:] score = (out[lo_hi[0], -1] + out[lo_hi[1], -1]) / 2. diff_box = (hi_box - lo_box) / lo_hi_dist for offset in range(1, lo_hi_dist): if out[lo_hi[0] + offset, -1] == 0: # if the cell was not filled before out[lo_hi[0] + offset, 1:] = lo_box + offset*diff_box out[lo_hi[0] + offset, -1] = score output.append([out, score]) # out: [num_frames, (frame idx, x1, y1, x2, y2, score)] RES[ilabel] = output # RES{ilabel:[(out[length,6],score)]}ilabel[0,...] with open(outfile, 'wb') as fid: pickle.dump(RES, fid) Bar.suffix = '[{0}/{1}]:{2}|Tot: {total:} |ETA: {eta:} '.format( iv + 1, len(vlist), v, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish()
def BuildTubes(opt): redo = opt.redo if not redo: print('load previous linking results...') print('if you want to reproduce it, please add --redo') Dataset = get_dataset(opt.dataset) inference_dirname = opt.inference_dir K = opt.K split = 'val' dataset = Dataset(opt, split) print('inference_dirname is ', inference_dirname) vlist = dataset._test_videos[opt.split - 1] bar = Bar('{}'.format('BuildTubes'), max=len(vlist)) for iv, v in enumerate(vlist): outfile = os.path.join(inference_dirname, v + "_tubes.pkl") if os.path.isfile(outfile) and not redo: continue RES = {} nframes = dataset._nframes[v] # load detected tubelets VDets = {} for startframe in range(1, nframes + 2 - K): resname = os.path.join(inference_dirname, v, "{:0>5}.pkl".format(startframe)) if not os.path.isfile(resname): print("ERROR: Missing extracted tubelets " + resname) sys.exit() with open(resname, 'rb') as fid: VDets[startframe] = pickle.load(fid) for ilabel in range(len(dataset.labels)): FINISHED_TUBES = [] CURRENT_TUBES = [] # tubes is a list of tuple (frame, lstubelets) # calculate average scores of tubelets in tubes def tubescore(tt): return np.mean(np.array([tt[i][1][-1] for i in range(len(tt))])) for frame in range(1, dataset._nframes[v] + 2 - K): # load boxes of the new frame and do nms while keeping Nkeep highest scored ltubelets = VDets[frame][ ilabel + 1] # [:,range(4*K) + [4*K + 1 + ilabel]] Nx(4K+1) with (x1 y1 x2 y2)*K ilabel-score ltubelets = nms_tubelets(ltubelets, 0.6, top_k=10) # just start new tubes if frame == 1: for i in range(ltubelets.shape[0]): CURRENT_TUBES.append([(1, ltubelets[i, :])]) continue # sort current tubes according to average score avgscore = [tubescore(t) for t in CURRENT_TUBES] argsort = np.argsort(-np.array(avgscore)) CURRENT_TUBES = [CURRENT_TUBES[i] for i in argsort] # loop over tubes finished = [] for it, t in enumerate(CURRENT_TUBES): # compute ious between the last box of t and ltubelets last_frame, last_tubelet = t[-1] ious = [] offset = frame - last_frame if offset < K: nov = K - offset ious = sum([ iou2d( ltubelets[:, 4 * iov:4 * iov + 4], last_tubelet[4 * (iov + offset):4 * (iov + offset + 1)]) for iov in range(nov) ]) / float(nov) else: ious = iou2d(ltubelets[:, :4], last_tubelet[4 * K - 4:4 * K]) valid = np.where(ious >= 0.5)[0] if valid.size > 0: # take the one with maximum score idx = valid[np.argmax(ltubelets[valid, -1])] CURRENT_TUBES[it].append((frame, ltubelets[idx, :])) ltubelets = np.delete(ltubelets, idx, axis=0) else: if offset >= opt.K: finished.append(it) # finished tubes that are done for it in finished[:: -1]: # process in reverse order to delete them with the right index why --++-- FINISHED_TUBES.append(CURRENT_TUBES[it][:]) del CURRENT_TUBES[it] # start new tubes for i in range(ltubelets.shape[0]): CURRENT_TUBES.append([(frame, ltubelets[i, :])]) # all tubes are not finished FINISHED_TUBES += CURRENT_TUBES # build real tubes output = [] for t in FINISHED_TUBES: score = tubescore(t) # just start new tubes if score < 0.005: continue beginframe = t[0][0] endframe = t[-1][0] + K - 1 length = endframe + 1 - beginframe # delete tubes with short duraton if length < 15: continue # build final tubes by average the tubelets out = np.zeros((length, 6), dtype=np.float32) out[:, 0] = np.arange(beginframe, endframe + 1) n_per_frame = np.zeros((length, 1), dtype=np.int32) for i in range(len(t)): frame, box = t[i] for k in range(K): out[frame - beginframe + k, 1:5] += box[4 * k:4 * k + 4] out[frame - beginframe + k, -1] += box[-1] # single frame confidence n_per_frame[frame - beginframe + k, 0] += 1 out[:, 1:] /= n_per_frame output.append([out, score]) # out: [num_frames, (frame idx, x1, y1, x2, y2, score)] RES[ilabel] = output # RES{ilabel:[(out[length,6],score)]}ilabel[0,...] with open(outfile, 'wb') as fid: pickle.dump(RES, fid) Bar.suffix = '[{0}/{1}]:{2}|Tot: {total:} |ETA: {eta:} '.format( iv + 1, len(vlist), v, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish()