def runner_func(dataset, state_dict, gpu_id, index_queue, result_queue): torch.cuda.set_device(gpu_id) net = BinaryClassifier(num_class, 5, args.modality, test_mode=True, new_length=data_length, base_model=args.arch) net.load_state_dict(state_dict) net.prepare_test_fc() net.eval() net.cuda() output_dim = net.test_fc.out_features while True: index = index_queue.get() frames_gen, frame_cnt = dataset[index] num_crop = args.test_crops length = 3 if args.modality == 'Flow': length = 10 elif args.modality == 'RGBDiff': length = 18 output = torch.zeros((frame_cnt, num_crop, output_dim)).cuda() cnt = 0 for frames in frames_gen: input_var = torch.autograd.Variable(frames.view(-1, length, frames.size(-2), frames.size(-1)).cuda(), volatile=True) rst, _ = net(input_var, None) sc = rst.data.view(-1, num_crop, output_dim) output[cnt:cnt + sc.size(0), :, :] = sc cnt += sc.size(0) if hasattr(dataset, 'video_list'): result_queue.put((dataset.video_list[index].id.split('/')[-1], output.cpu().numpy())) elif hasattr(dataset, 'video_dict'): result_queue.put((dataset.video_dict.keys()[index].split('/')[-1], output.cpu().numpy()))
def runner_func(dataset, state_dict, gpu_id, index_queue, result_queue): torch.cuda.set_device(gpu_id) net = BinaryClassifier(num_class, args.num_body_segments, args, dropout=args.dropout, test_mode=True) # net = torch.nn.DataParallel(net, device_ids=[gpu_id]) net.load_state_dict(state_dict) net.eval() net.cuda() while True: index = index_queue.get() feature, feature_mask, num_feat, pos_ind, video_id, _ = dataset[index] feature = feature.cuda() feature_mask = feature_mask.cuda() pos_ind = pos_ind.cuda() video_id = video_id with torch.no_grad(): rois, actness, roi_scores = net(feature, pos_ind, feature_mask=feature_mask, test_mode=True) rois, actness, roi_scores = rois[0].cpu().numpy(), actness[0].cpu( ).numpy(), roi_scores[0].cpu().numpy()[:, 1] outputs = [rois, actness, roi_scores, num_feat] result_queue.put( (dataset.video_list[index].id.split('/')[-1], outputs))
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_actionness_configs(args.dataset) sampling_configs = dataset_configs["sampling"] num_class = dataset_configs["num_class"] args.dropout = 0.8 if args.modality == "RGB": data_length = 1 elif args.modality in ["Flow", "RGBDiff"]: data_length = 5 else: raise ValueError("unknown modality {}".format(args.modality)) model = BinaryClassifier( num_class, args.num_body_segments, args.modality, new_length=data_length, base_model=args.arch, dropout=args.dropout, bn_mode=args.bn_mode, ) if args.init_weights: if os.path.isfile(args.init_weights): print(("=> loading pretrained weights from '{}'".format( args.init_weights))) wd = torch.load(args.init_weights) model.base_model.load_state_dict(wd["state_dict"]) print( ("=> no weights file found at '{}'".format(args.init_weights))) else: print( ("=> no weights file found at '{}'".format(args.init_weights))) elif args.kinetics_pretrain: model_url = dataset_configs["kinetics_pretrain"][args.arch][ args.modality] model.base_model.load_state_dict( model_zoo.load_url(model_url)["state_dict"]) print(("=> loaded init weights from '{}'".format(model_url))) else: # standard ImageNet pretraining if args.modality == "Flow": model_url = dataset_configs["flow_init"][args.arch] model.base_model.load_state_dict( model_zoo.load_url(model_url)["state_dict"]) print(("=> loaded flow init weights from '{}'".format(model_url))) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() cudnn.benchmark = True pin_memory = args.modality == "RGB" # Data loading code if args.modality != "RGBDiff": normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() train_prop_file = "data/{}_proposal_list.txt".format( dataset_configs["train_list"]) val_prop_file = "data/{}_proposal_list.txt".format( dataset_configs["test_list"]) train_loader = torch.utils.data.DataLoader( BinaryDataSet( "", train_prop_file, new_length=data_length, modality=args.modality, exclude_empty=True, body_seg=args.num_body_segments, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ["BNInception", "InceptionV3"])), ToTorchFormatTensor( div=(args.arch not in ["BNInception", "InceptionV3"])), normalize, ]), ), batch_size=4, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) val_loader = torch.utils.data.DataLoader( BinaryDataSet( "", val_prop_file, new_length=data_length, modality=args.modality, exclude_empty=True, body_seg=args.num_body_segments, image_tmpl="img_{:05}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, fg_ratio=6, bg_ratio=6, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ["BNInception", "InceptionV3"])), ToTorchFormatTensor( div=(args.arch not in ["BNInception", "InceptionV3"])), normalize, ]), ), batch_size=4, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, ) binary_criterion = torch.nn.CrossEntropyLoss().cuda() for group in policies: print(("group: {} has {} params, lr_mult: {}, decay_mult: {}".format( group["name"], len(group["params"]), group["lr_mult"], group["decay_mult"], ))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, binary_criterion, optimizer, epoch) # evaluate on validation list if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, binary_criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint( { "epoch": epoch + 1, "arch": args.arch, "state_dict": model.state_dict(), "best_loss": best_loss, }, is_best, )
volatile=True, ) rst, _ = net(input_var, None) sc = rst.data.view(-1, num_crop, output_dim) output[cnt : cnt + sc.size(0), :, :] = sc cnt += sc.size(0) result_queue.put( (dataset.video_list[index].id.split("/")[-1], output.cpu().numpy()) ) if __name__ == "__main__": ctx = multiprocessing.get_context("spawn") net = BinaryClassifier(num_class, 5, args.modality, base_model=args.arch) if args.test_crops == 1: cropping = torchvision.transforms.Compose( [GroupScale(net.scale_size), GroupScale(net.input_size)] ) elif args.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)] ) else: raise ValueError( "only 1 and 10 crops are supported while we got {}".format(args.test_crop) ) if not args.use_reference and not args.use_kinetics_reference:
feature_mask=feature_mask, test_mode=True) rois, actness, roi_scores = rois[0].cpu().numpy(), actness[0].cpu( ).numpy(), roi_scores[0].cpu().numpy()[:, 1] # import pdb; pdb.set_trace() outputs = [rois, actness, roi_scores, num_feat] result[video_id] = outputs return result if __name__ == '__main__': ctx = multiprocessing.get_context('spawn') net = BinaryClassifier(num_class, args.num_body_segments, args, dropout=args.dropout, test_mode=True) checkpoint = torch.load(args.weights) print("model epoch {} loss: {}".format(checkpoint['epoch'], checkpoint['best_loss'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } db = ANetDB.get_db("1.3") val_videos = db.get_subset_videos(args.subset) loader = torch.utils.data.DataLoader(BinaryDataSet(
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_actionness_configs(args.dataset) sampling_configs = dataset_configs['sampling'] num_class = dataset_configs['num_class'] torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) db = ANetDB.get_db("1.3") # set the directory for the rgb features if args.feat_model == 'i3d_rgb' or args.feat_model == 'i3d_rgb_trained': args.input_dim = 1024 elif args.feat_model == 'inception_resnet_v2' or args.feat_model == 'inception_resnet_v2_trained': args.input_dim = 1536 if args.use_flow: if not args.only_flow: args.input_dim += 1024 else: args.input_dim = 1024 print(("=> the input features are extracted from '{}' and the dim is '{}'" ).format(args.feat_model, args.input_dim)) # if reduce the dimension of input feature first if args.reduce_dim > 0: assert args.reduce_dim % args.n_head == 0, "reduce_dim {} % n_head {} != 0".format( args.reduce_dim, args.n_head) args.d_k = int(args.reduce_dim // args.n_head) args.d_v = args.d_k else: assert args.input_dim % args.n_head == 0, "input_dim {} % n_head {} != 0".format( args.input_dim, args.n_head) args.d_k = int(args.input_dim // args.n_head) args.d_v = args.d_k args.d_model = args.n_head * args.d_k if not os.path.exists(args.result_path): os.makedirs(args.result_path) if args.pos_enc: save_path = os.path.join( args.result_path, '_'.join( (args.att_kernel_type, 'N' + str(args.n_layers)))) else: save_path = os.path.join( args.result_path, '_'.join( (args.att_kernel_type, 'N' + str(args.n_layers)))) + '_nopos' if args.num_local > 0: save_path = save_path + '_loc' + str(args.num_local) + args.local_type if args.dilated_mask: save_path += '_dilated' if args.groupwise_heads > 0: save_path = save_path + '_G' + str(args.groupwise_heads) if len(args.roi_poolsize) > 0: save_path = save_path + '_roi' + str(args.roi_poolsize) model_name = os.path.split(save_path)[1] # logger = Logger('./logs/{}'.format(model_name)) logger = None model = BinaryClassifier(num_class, args.num_body_segments, args, dropout=args.dropout) model = torch.nn.DataParallel(model, device_ids=None).cuda() cudnn.enabled = False # cudnn.benchmark = True pin_memory = True train_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['train_list']) val_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['test_list']) train_videos = db.get_subset_videos('training') val_videos = db.get_subset_videos('validation') train_loader = torch.utils.data.DataLoader(BinaryDataSet( args.feat_root, args.feat_model, train_prop_file, train_videos, exclude_empty=True, body_seg=args.num_body_segments, input_dim=args.d_model, prop_per_video=args.prop_per_video, fg_ratio=6, bg_ratio=6, num_local=args.num_local, use_flow=args.use_flow, only_flow=args.only_flow), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) # val_loader = torch.utils.data.DataLoader( # BinaryDataSet(args.feat_root, args.feat_model, val_prop_file, val_videos, # exclude_empty=True, body_seg=args.num_body_segments, # input_dim=args.d_model, prop_per_video=args.prop_per_video, # fg_ratio=6, bg_ratio=6, num_local=args.num_local, # use_flow=args.use_flow, only_flow=args.only_flow), # batch_size=args.batch_size//2, shuffle=False, # num_workers=args.workers*2, pin_memory=pin_memory) val_loader = torch.utils.data.DataLoader(BinaryDataSet( args.feat_root, args.feat_model, val_prop_file, subset_videos=val_videos, exclude_empty=True, body_seg=args.num_body_segments, input_dim=args.d_model, test_mode=True, use_flow=args.use_flow, verbose=False, num_local=args.num_local, only_flow=args.only_flow), batch_size=1, shuffle=False, num_workers=10, pin_memory=True) ground_truth, cls_to_idx = grd_activity( 'data/activity_net.v1-3.min_save.json', subset='validation') del cls_to_idx['background'] # optimizer = torch.optim.Adam( # model.parameters(), # args.lr, weight_decay=args.weight_decay) optimizer = AdamW(model.parameters(), args.lr, weight_decay=args.weight_decay) # optimizer = torch.optim.SGD(model.parameters(), # args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay, nesterov=False) if args.resume is not None and len(args.resume) > 0: model.load_state_dict(torch.load(args.resume)['state_dict'], strict=False) criterion_stage1 = CE_Criterion_multi(use_weight=True) criterion_stage2 = Rank_Criterion(epsilon=0.02) patience = 0 for epoch in range(args.start_epoch, args.epochs): # adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch if patience > 5: break train(train_loader, model, optimizer, criterion_stage1, criterion_stage2, epoch, logger) # evaluate on validation list if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, ground_truth, (epoch + 1) * len(train_loader), epoch) # remember best prec@1 and save checkpoint is_best = 1.0001 * loss < best_loss if is_best: patience = 0 else: patience += 1 best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.model, 'state_dict': model.state_dict(), 'best_loss': best_loss, }, is_best, save_path)