parser.add_argument( "-j", "--workers", default=4, type=int, metavar="N", help="number of data loading workers (default: 4)", ) parser.add_argument("--gpus", nargs="+", type=int, default=None) parser.add_argument("--flow_pref", type=str, default="") parser.add_argument("--use_reference", default=False, action="store_true") parser.add_argument("--use_kinetics_reference", default=False, action="store_true") args = parser.parse_args() dataset_configs = get_actionness_configs(args.dataset) num_class = dataset_configs["num_class"] if args.dataset == "thumos14": if args.subset == "validation": test_prop_file = "data/{}_proposal_list.txt".format( dataset_configs["train_list"] ) elif args.subset == "testing": test_prop_file = "data/{}_proposal_list.txt".format( dataset_configs["test_list"] ) elif args.dataset == "activitynet1.2": if args.subset == "training": test_prop_file = "data/{}_proposal_list.txt".format( dataset_configs["train_list"]
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_actionness_configs(args.dataset) sampling_configs = dataset_configs["sampling"] num_class = dataset_configs["num_class"] args.dropout = 0.8 if args.modality == "RGB": data_length = 1 elif args.modality in ["Flow", "RGBDiff"]: data_length = 5 else: raise ValueError("unknown modality {}".format(args.modality)) model = BinaryClassifier( num_class, args.num_body_segments, args.modality, new_length=data_length, base_model=args.arch, dropout=args.dropout, bn_mode=args.bn_mode, ) if args.init_weights: if os.path.isfile(args.init_weights): print(("=> loading pretrained weights from '{}'".format( args.init_weights))) wd = torch.load(args.init_weights) model.base_model.load_state_dict(wd["state_dict"]) print( ("=> no weights file found at '{}'".format(args.init_weights))) else: print( ("=> no weights file found at '{}'".format(args.init_weights))) elif args.kinetics_pretrain: model_url = dataset_configs["kinetics_pretrain"][args.arch][ args.modality] model.base_model.load_state_dict( model_zoo.load_url(model_url)["state_dict"]) print(("=> loaded init weights from '{}'".format(model_url))) else: # standard ImageNet pretraining if args.modality == "Flow": model_url = dataset_configs["flow_init"][args.arch] model.base_model.load_state_dict( model_zoo.load_url(model_url)["state_dict"]) print(("=> loaded flow init weights from '{}'".format(model_url))) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() cudnn.benchmark = True pin_memory = args.modality == "RGB" # Data loading code if args.modality != "RGBDiff": normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() train_prop_file = "data/{}_proposal_list.txt".format( dataset_configs["train_list"]) val_prop_file = "data/{}_proposal_list.txt".format( dataset_configs["test_list"]) train_loader = torch.utils.data.DataLoader( BinaryDataSet( "", train_prop_file, new_length=data_length, modality=args.modality, exclude_empty=True, body_seg=args.num_body_segments, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ["BNInception", "InceptionV3"])), ToTorchFormatTensor( div=(args.arch not in ["BNInception", "InceptionV3"])), normalize, ]), ), batch_size=4, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) val_loader = torch.utils.data.DataLoader( BinaryDataSet( "", val_prop_file, new_length=data_length, modality=args.modality, exclude_empty=True, body_seg=args.num_body_segments, image_tmpl="img_{:05}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, fg_ratio=6, bg_ratio=6, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ["BNInception", "InceptionV3"])), ToTorchFormatTensor( div=(args.arch not in ["BNInception", "InceptionV3"])), normalize, ]), ), batch_size=4, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, ) binary_criterion = torch.nn.CrossEntropyLoss().cuda() for group in policies: print(("group: {} has {} params, lr_mult: {}, decay_mult: {}".format( group["name"], len(group["params"]), group["lr_mult"], group["decay_mult"], ))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, binary_criterion, optimizer, epoch) # evaluate on validation list if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, binary_criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint( { "epoch": epoch + 1, "arch": args.arch, "state_dict": model.state_dict(), "best_loss": best_loss, }, is_best, )
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_actionness_configs(args.dataset) sampling_configs = dataset_configs['sampling'] num_class = dataset_configs['num_class'] torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) db = ANetDB.get_db("1.3") # set the directory for the rgb features if args.feat_model == 'i3d_rgb' or args.feat_model == 'i3d_rgb_trained': args.input_dim = 1024 elif args.feat_model == 'inception_resnet_v2' or args.feat_model == 'inception_resnet_v2_trained': args.input_dim = 1536 if args.use_flow: if not args.only_flow: args.input_dim += 1024 else: args.input_dim = 1024 print(("=> the input features are extracted from '{}' and the dim is '{}'" ).format(args.feat_model, args.input_dim)) # if reduce the dimension of input feature first if args.reduce_dim > 0: assert args.reduce_dim % args.n_head == 0, "reduce_dim {} % n_head {} != 0".format( args.reduce_dim, args.n_head) args.d_k = int(args.reduce_dim // args.n_head) args.d_v = args.d_k else: assert args.input_dim % args.n_head == 0, "input_dim {} % n_head {} != 0".format( args.input_dim, args.n_head) args.d_k = int(args.input_dim // args.n_head) args.d_v = args.d_k args.d_model = args.n_head * args.d_k if not os.path.exists(args.result_path): os.makedirs(args.result_path) if args.pos_enc: save_path = os.path.join( args.result_path, '_'.join( (args.att_kernel_type, 'N' + str(args.n_layers)))) else: save_path = os.path.join( args.result_path, '_'.join( (args.att_kernel_type, 'N' + str(args.n_layers)))) + '_nopos' if args.num_local > 0: save_path = save_path + '_loc' + str(args.num_local) + args.local_type if args.dilated_mask: save_path += '_dilated' if args.groupwise_heads > 0: save_path = save_path + '_G' + str(args.groupwise_heads) if len(args.roi_poolsize) > 0: save_path = save_path + '_roi' + str(args.roi_poolsize) model_name = os.path.split(save_path)[1] # logger = Logger('./logs/{}'.format(model_name)) logger = None model = BinaryClassifier(num_class, args.num_body_segments, args, dropout=args.dropout) model = torch.nn.DataParallel(model, device_ids=None).cuda() cudnn.enabled = False # cudnn.benchmark = True pin_memory = True train_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['train_list']) val_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['test_list']) train_videos = db.get_subset_videos('training') val_videos = db.get_subset_videos('validation') train_loader = torch.utils.data.DataLoader(BinaryDataSet( args.feat_root, args.feat_model, train_prop_file, train_videos, exclude_empty=True, body_seg=args.num_body_segments, input_dim=args.d_model, prop_per_video=args.prop_per_video, fg_ratio=6, bg_ratio=6, num_local=args.num_local, use_flow=args.use_flow, only_flow=args.only_flow), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) # val_loader = torch.utils.data.DataLoader( # BinaryDataSet(args.feat_root, args.feat_model, val_prop_file, val_videos, # exclude_empty=True, body_seg=args.num_body_segments, # input_dim=args.d_model, prop_per_video=args.prop_per_video, # fg_ratio=6, bg_ratio=6, num_local=args.num_local, # use_flow=args.use_flow, only_flow=args.only_flow), # batch_size=args.batch_size//2, shuffle=False, # num_workers=args.workers*2, pin_memory=pin_memory) val_loader = torch.utils.data.DataLoader(BinaryDataSet( args.feat_root, args.feat_model, val_prop_file, subset_videos=val_videos, exclude_empty=True, body_seg=args.num_body_segments, input_dim=args.d_model, test_mode=True, use_flow=args.use_flow, verbose=False, num_local=args.num_local, only_flow=args.only_flow), batch_size=1, shuffle=False, num_workers=10, pin_memory=True) ground_truth, cls_to_idx = grd_activity( 'data/activity_net.v1-3.min_save.json', subset='validation') del cls_to_idx['background'] # optimizer = torch.optim.Adam( # model.parameters(), # args.lr, weight_decay=args.weight_decay) optimizer = AdamW(model.parameters(), args.lr, weight_decay=args.weight_decay) # optimizer = torch.optim.SGD(model.parameters(), # args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay, nesterov=False) if args.resume is not None and len(args.resume) > 0: model.load_state_dict(torch.load(args.resume)['state_dict'], strict=False) criterion_stage1 = CE_Criterion_multi(use_weight=True) criterion_stage2 = Rank_Criterion(epsilon=0.02) patience = 0 for epoch in range(args.start_epoch, args.epochs): # adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch if patience > 5: break train(train_loader, model, optimizer, criterion_stage1, criterion_stage2, epoch, logger) # evaluate on validation list if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, ground_truth, (epoch + 1) * len(train_loader), epoch) # remember best prec@1 and save checkpoint is_best = 1.0001 * loss < best_loss if is_best: patience = 0 else: patience += 1 best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.model, 'state_dict': model.state_dict(), 'best_loss': best_loss, }, is_best, save_path)