parser.add_argument('--no_regression', default=False, action="store_true") parser.add_argument('--softmax_before_filter', default=False, action="store_true") parser.add_argument('-j', '--ap_workers', type=int, default=32) parser.add_argument('--top_k', type=int, default=None) parser.add_argument('--cls_scores', type=str, default=None) parser.add_argument('--cls_top_k', type=int, default=1) parser.add_argument('--score_weights', type=float, default=None, nargs='+') parser.add_argument('--externel_score', type=str, default='test_gt_score_combined_refined_fusion') args = parser.parse_args() dataset_configs = get_configs(args.dataset) num_class = dataset_configs['num_class'] test_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['test_list']) evaluate.number_label = num_class nms_threshold = args.nms_threshold if args.nms_threshold else dataset_configs[ 'evaluation']['nms_threshold'] top_k = args.top_k if args.top_k else dataset_configs['evaluation']['top_k'] softmax_bf = args.softmax_before_filter \ if args.softmax_before_filter else dataset_configs['evaluation']['softmax_before_filter'] print("initiating evaluation of detection results {}".format( args.detection_pickles)) score_pickle_list = [] for pc in args.detection_pickles:
parser.add_argument('--frame_interval', type=int, default=6) parser.add_argument('--test_batchsize', type=int, default=512) parser.add_argument('--no_regression', action="store_true", default=False) parser.add_argument('--max_num', type=int, default=-1) parser.add_argument('--test_crops', type=int, default=10) parser.add_argument('--input_size', type=int, default=224) parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--gpus', nargs='+', type=int, default=None) parser.add_argument('--flow_pref', type=str, default='') parser.add_argument('--use_reference', default=False, action='store_true') parser.add_argument('--use_kinetics_reference', default=False, action='store_true') args = parser.parse_args() dataset_configs = get_configs(args.dataset) num_class = dataset_configs['num_class'] stpp_configs = tuple(dataset_configs['stpp']) test_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list']) if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 else: raise ValueError("unknown modality {}".format(args.modality)) gpu_list = args.gpus if args.gpus is not None else range(8)
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_configs(args.dataset) num_class = dataset_configs['num_class'] stpp_configs = tuple(dataset_configs['stpp']) sampling_configs = dataset_configs['sampling'] model = SSN(num_class, args.num_aug_segments, args.num_body_segments, args.num_aug_segments, args.modality, base_model=args.arch, dropout=args.dropout, stpp_cfg=stpp_configs, bn_mode=args.bn_mode) if args.init_weights: if os.path.isfile(args.init_weights): print(("=> loading pretrained weigths '{}'".format( args.init_weights))) wd = torch.load(args.init_weights) model.base_model.load_state_dict(wd['state_dict']) print( ("=> loaded init weights from '{}'".format(args.init_weights))) else: print( ("=> no weights file found at '{}'".format(args.init_weights))) elif args.kinetics_pretrain: model_url = dataset_configs['kinetics_pretrain'][args.arch][ args.modality] model.base_model.load_state_dict( model_zoo.load_url(model_url)['state_dict']) print(("=> loaded init weights from '{}'".format(model_url))) else: # standard ImageNet pretraining if args.modality == 'Flow': model_url = dataset_configs['flow_init'][args.arch] model.base_model.load_state_dict( model_zoo.load_url(model_url)['state_dict']) print(("=> loaded flow init weights from '{}'".format(model_url))) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True pin_memory = (args.modality == 'RGB') # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 else: raise ValueError("unknown modality {}".format(args.modality)) train_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['train_list']) val_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['val_list']) train_loader = torch.utils.data.DataLoader( SSNDataSet( "", train_prop_file, epoch_multiplier=args.training_epoch_multiplier, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) # in training we drop the last incomplete minibatch val_loader = torch.utils.data.DataLoader(SSNDataSet( "", val_prop_file, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), reg_stats=train_loader.dataset.stats), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=pin_memory) activity_criterion = torch.nn.CrossEntropyLoss().cuda() completeness_criterion = CompletenessLoss().cuda() regression_criterion = ClassWiseRegressionLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, activity_criterion, completeness_criterion, regression_criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'reg_stats': torch.from_numpy(train_loader.dataset.stats) }, is_best, foldername=args.save_path, filename="checkpoint_{}.pth".format(epoch)) print('======================================================') print(epoch, is_best, loss, best_loss) print('======================================================')
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_configs(args.dataset) num_class = dataset_configs['num_class'] stpp_configs = tuple(dataset_configs['stpp']) sampling_configs = dataset_configs['sampling'] model = SSN(num_class, args.num_aug_segments, args.num_body_segments, args.num_aug_segments, args.modality, base_model=args.arch, dropout=args.dropout, stpp_cfg=stpp_configs, bn_mode=args.bn_mode) if args.init_weights: if os.path.isfile(args.init_weights): print(("=> loading pretrained weigths '{}'".format(args.init_weights))) wd = torch.load(args.init_weights) model.base_model.load_state_dict(wd['state_dict']) print(("=> loaded init weights from '{}'" .format(args.init_weights))) else: print(("=> no weights file found at '{}'".format(args.init_weights))) elif args.kinetics_pretrain: model_url = dataset_configs['kinetics_pretrain'][args.arch][args.modality] model.base_model.load_state_dict(model_zoo.load_url(model_url)['state_dict']) print(("=> loaded init weights from '{}'" .format(model_url))) else: # standard ImageNet pretraining if args.modality == 'Flow': model_url = dataset_configs['flow_init'][args.arch] model.base_model.load_state_dict(model_zoo.load_url(model_url)['state_dict']) print(("=> loaded flow init weights from '{}'" .format(model_url))) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True pin_memory = (args.modality == 'RGB') # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 else: raise ValueError("unknown modality {}".format(args.modality)) train_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['train_list']) val_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list']) train_loader = torch.utils.data.DataLoader( SSNDataSet("", train_prop_file, epoch_multiplier=args.training_epoch_multiplier, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) # in training we drop the last incomplete minibatch val_loader = torch.utils.data.DataLoader( SSNDataSet("", val_prop_file, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), reg_stats=train_loader.dataset.stats), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=pin_memory) activity_criterion = torch.nn.CrossEntropyLoss().cuda() completeness_criterion = CompletenessLoss().cuda() regression_criterion = ClassWiseRegressionLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, activity_criterion, completeness_criterion, regression_criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'reg_stats': torch.from_numpy(train_loader.dataset.stats) }, is_best)
parser.add_argument('--top_k', type=int, default=None) parser.add_argument('--cls_scores', type=str, nargs='+') parser.add_argument('--reg_scores', type=str, default=None) parser.add_argument('--cls_top_k', type=int, default=1) parser.add_argument('--cfg', default='data/dataset_cfg.yml') parser.add_argument('--score_weights', type=float, default=None, nargs='+') parser.add_argument('--min_length', type=float, default=None, help='minimum duration of proposals in second') parser.add_argument('--one_iou', action='store_true') parser.add_argument('--no_comp', action='store_true') args = parser.parse_args() configs = get_configs(args.dataset, args.cfg) dataset_configs = configs['dataset_configs'] model_configs = configs["model_configs"] num_class = model_configs['num_class'] nms_threshold = args.nms_threshold if args.nms_threshold else configs[ 'evaluation']['nms_threshold'] top_k = args.top_k if args.top_k else configs['evaluation']['top_k'] print('---' * 10) print(time.strftime('%Y-%m-%d %H:%M:%S')) print("initiating evaluation of detection results {}".format( args.detection_pickles)) print('top_k={}'.format(top_k)) sys.stdout.flush()
import argparse import os from ops.io import process_proposal_list, parse_directory from ops.utils import get_configs # 获取相关数据集的配置 # 使用时传入数据集名称和帧路径 parser = argparse.ArgumentParser( description="Generate proposal list to be used for training") parser.add_argument('dataset', type=str, choices=['activitynet1.2', 'thumos14']) parser.add_argument('frame_path', type=str) args = parser.parse_args() configs = get_configs(args.dataset) # 获取相关数据集的配置 norm_list_tmpl = 'data/{}_normalized_proposal_list.txt' out_list_tmpl = 'data/{}_proposal_list.txt' if args.dataset == 'activitynet1.2': key_func = lambda x: x[-11:] elif args.dataset == 'thumos14': key_func = lambda x: x.split('/')[-1] # 一个lambda表达式用来获取路径最后一个视频名 else: raise ValueError("unknown dataset {}".format(args.dataset)) # parse the folders holding the extracted frames frame_dict = parse_directory(args.frame_path, key_func=key_func) # 因为不同的机器生成的帧数是不相同的,作者将所有的帧数进行了归一化,根据上面获得的实际帧数列表生成对应实际情况的帧数
parser.add_argument('--nms_threshold', type=float, default=0.32) parser.add_argument('--no_regression', default=False, action="store_true") parser.add_argument('-j', '--ap_workers', type=int, default=32) parser.add_argument('--top_k', type=int, default=None) parser.add_argument('--cls_scores', type=str, default=None) parser.add_argument('--cls_top_k', type=int, default=1) # parser.add_argument('--score_weights', type=float, default=None, choices=[None, [1.2, 1] ],nargs='+') parser.add_argument('--score_weights', type=float, default=[1.2, 1], choices=[None, [1.2, 1]], nargs='+') args = parser.parse_args() configs = get_configs(args.dataset, args.yaml_file.format(args.mode)) dataset_configs = configs['dataset_configs'] model_configs = configs["model_configs"] graph_configs = configs["graph_configs"] num_class = model_configs['num_class'] nms_threshold = args.nms_threshold if args.nms_threshold else configs[ 'evaluation']['nms_threshold'] top_k = args.top_k if args.top_k else configs['evaluation']['top_k'] print("initiating evaluation of detection results {}".format( args.detection_pickles)) score_pickle_list = [] for pc in args.detection_pickles: score_pickle_list.append(pickle.load(open(pc.format(args.mode), 'rb'))) # with open(pc+'.json', 'r') as fobj:
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_configs(args.dataset) num_class = dataset_configs['num_class'] stpp_configs = tuple(dataset_configs['stpp']) #TODO sampling_configs = dataset_configs['sampling'] base_model = 'p3d' model = SSN(num_class, args.num_aug_segments, args.num_body_segments, args.num_aug_segments, args.modality, base_model=base_model, dropout=args.dropout, stpp_cfg=stpp_configs, bn_mode=args.bn_mode) weights_file = 'ssn_activitynet1.2_BNInception_rgb_epoch-2_checkpoint.pth.tar' weights = torch.load(weights_file)['state_dict'] weights = {'.'.join(k.split('.')[1:]): v for k, v in list(weights.items())} model.load_state_dict(weights) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() cudnn.benchmark = True pin_memory = (args.modality == 'RGB') # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 else: raise ValueError("unknown modality {}".format(args.modality)) train_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['train_list']) val_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['test_list']) train_loader = torch.utils.data.DataLoader( SSNDataSet( "", train_prop_file, epoch_multiplier=args.training_epoch_multiplier, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) # in training we drop the last incomplete minibatch val_loader = torch.utils.data.DataLoader(SSNDataSet( "", val_prop_file, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), reg_stats=train_loader.dataset.stats), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=pin_memory) activity_criterion = torch.nn.CrossEntropyLoss().cuda() completeness_criterion = CompletenessLoss().cuda() regression_criterion = ClassWiseRegressionLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, 0) return # exit() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, activity_criterion, completeness_criterion, regression_criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'reg_stats': torch.from_numpy(train_loader.dataset.stats) }, is_best)