cropping, Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) net = torch.nn.DataParallel(net.cuda()) net.eval() data_gen = enumerate(data_loader) total_num = len(data_loader.dataset) output = [] def eval_video(video_data): i, data, label = video_data num_crop = args.test_crops if args.modality == 'RGB': length = 3 elif args.modality == 'Flow':
Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std) ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) print(devices) net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] net.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) # ToDo: why # if len(devices) > 1: # cause bug
ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), GroupNormalize(originalNet.input_mean, originalNet.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=False) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) originalNet = torch.nn.DataParallel(originalNet.cuda()) originalNet.eval() exit() data_gen = enumerate(data_loader) total_num = len(data_loader.dataset) output = [] def eval_video(video_data): i, data, label = video_data num_crop = args.test_crops if "RGBFlow" == 'RGB': length = 3 elif "RGBFlow" == 'Flow':
args.modality, base_model=args.arch, consensus_type=args.consensus_type, img_feature_dim=args.img_feature_dim, print_spec=False) weights = args.weight checkpoint = torch.load(weights) #print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } net.load_state_dict(base_dict) net.cuda().eval() # Initialize frame transforms. transform = torchvision.transforms.Compose([ GroupOverSample(net.input_size, net.scale_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), GroupNormalize(net.input_mean, net.input_std), ]) # Obtain video frames if args.frame_folder is not None: print('Loading frames in %s' % args.frame_folder) import glob # here make sure after sorting the frame paths have the correct temporal order
new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl="im{}.jpg", test_mode=True, save_scores=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) net = net.cuda() net.eval() data_gen = enumerate(data_loader) total_num = len(data_loader.dataset) output = [] def eval_video(video_data): i, data, label = video_data num_crop = args.test_crops if args.modality == 'RGB': length = 3 elif args.modality == 'Flow': length = 10
ei = 0 while(os.path.exists(logdir + '/%d/' % ei)): ei = ei + 1 ################################# # main loop ################################# for di in range(0, args.num_experiments): p['logdir'] = './%s/%s/%d/%d/' % (args.logdir, expdir, ei, di) if(not os.path.exists(p['logdir'])): os.makedirs(p['logdir']) model = [] model = TSN(p, dataset_train) model = model.cuda(device) optim = get_optimizer(args, model) max_perf_val = 0.0 max_perf_aux = 0.0 for epoch in range(0, args.num_epochs): stats_train = process_epoch('train', epoch, p, dataloader_train, model, optim) stats_val = process_epoch('val', epoch, p, dataloader_val, model) perf_val = stats_val['top1.cause'] + stats_val['top1.effect'] perf_val_aux = stats_val['top2.cause'] + stats_val['top2.effect'] if(perf_val >= max_perf_val): if(perf_val_aux >= max_perf_aux): max_perf_val = perf_val max_perf_aux = perf_val_aux
def get_pred(video_path, caption_path, opt): # options parser = argparse.ArgumentParser( description="TRN testing on the full validation set") # parser.add_argument('dataset', type=str, choices=['something','jester','moments','charades']) # parser.add_argument('modality', type=str, choices=['RGB', 'Flow', 'RGBDiff']) parser.add_argument('--dataset', type=str, default='somethingv2') parser.add_argument('--modality', type=str, default='RGB') parser.add_argument( '--weights', type=str, default= 'model/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar' ) parser.add_argument('--arch', type=str, default="BNInception") parser.add_argument('--save_scores', type=str, default=None) parser.add_argument('--test_segments', type=int, default=8) parser.add_argument('--max_num', type=int, default=-1) parser.add_argument('--test_crops', type=int, default=10) parser.add_argument('--input_size', type=int, default=224) parser.add_argument('--crop_fusion_type', type=str, default='TRNmultiscale', choices=['avg', 'TRN', 'TRNmultiscale']) parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--gpus', nargs='+', type=int, default=None) parser.add_argument('--img_feature_dim', type=int, default=256) parser.add_argument( '--num_set_segments', type=int, default=1, help='TODO: select multiply set of n-frames from a video') parser.add_argument('--softmax', type=int, default=0) args = parser.parse_args() def accuracy(output, target, topk=(1, )): """Computes the precision@k for the specified values of k""" maxk = max(topk) batch_size = target.size(0) prob, pred = output.topk(maxk, 1, True, True) prob = prob.t().data.numpy().squeeze() pred = pred.t().data.numpy().squeeze() return prob, pred categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality, opt) num_class = len(categories) net = TSN(num_class, args.test_segments if args.crop_fusion_type in ['TRN', 'TRNmultiscale'] else 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, img_feature_dim=args.img_feature_dim, opt=opt) try: checkpoint = torch.load(args.weights) except: args.weights = os.path.join(opt.project_root, 'scripts/Eval/', args.weights) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } net.load_state_dict(base_dict) if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) data_loader = torch.utils.data.DataLoader(TSNDataSet( video_path, caption_path, num_segments=args.test_segments, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl=prefix, test_mode=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) net = torch.nn.DataParallel(net.cuda()) net.eval() data_gen = enumerate(data_loader) output = [] def eval_video(video_data): i, data, label = video_data num_crop = args.test_crops if args.modality == 'RGB': length = 3 elif args.modality == 'Flow': length = 10 elif args.modality == 'RGBDiff': length = 18 else: raise ValueError("Unknown modality " + args.modality) input_var = torch.autograd.Variable(data.view(-1, length, data.size(2), data.size(3)), volatile=True) rst = net(input_var) if args.softmax == 1: # take the softmax to normalize the output to probability rst = F.softmax(rst) rst = rst.data.cpu().numpy().copy() if args.crop_fusion_type in ['TRN', 'TRNmultiscale']: rst = rst.reshape(-1, 1, num_class) else: rst = rst.reshape((num_crop, args.test_segments, num_class)).mean(axis=0).reshape( (args.test_segments, 1, num_class)) return i, rst, label[0] max_num = args.max_num if args.max_num > 0 else len(data_loader.dataset) prob_all, pred_all = [], [] for i, (data, label) in data_gen: if i >= max_num: break rst = eval_video((i, data, label)) output.append(rst[1:]) prob, pred = accuracy(torch.from_numpy(np.mean(rst[1], axis=0)), label, topk=(1, 174)) prob_all.append(prob) pred_all.append(pred) return prob_all, pred_all
""" base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())} for key in ['consensus.fc_fusion_scales.6.3.bias', 'consensus.fc_fusion_scales.5.3.bias', 'consensus.fc_fusion_scales.4.3.bias', 'consensus.fc_fusion_scales.3.3.bias', 'consensus.fc_fusion_scales.2.3.bias', 'consensus.fc_fusion_scales.1.3.bias', 'consensus.fc_fusion_scales.0.3.bias', 'consensus.fc_fusion_scales.6.3.weight', 'consensus.fc_fusion_scales.5.3.weight', 'consensus.fc_fusion_scales.4.3.weight', 'consensus.fc_fusion_scales.3.3.weight', 'consensus.fc_fusion_scales.2.3.weight', 'consensus.fc_fusion_scales.1.3.weight', 'consensus.fc_fusion_scales.0.3.weight']: del base_dict[key] #print(base_dict) """ #net.load_state_dict(base_dict, strict=False) net.load_state_dict(checkpoint, strict=True) #print(net) #exit(0) net.eval() net.cuda() # Initialize frame transforms. transform = torchvision.transforms.Compose([ transforms.GroupOverSample(net.module.input_size, net.module.scale_size), transforms.Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), transforms.ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), transforms.GroupNormalize(net.module.input_mean, net.module.input_std), ]) segments_gt = [0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 1,
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) ''' consensue_type = avg base_model = resnet_101 dropout : 0.5 ''' model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) #224 crop_size = model.crop_size #256/224 scale_size = model.scale_size # for each modiltiy is different input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() #这里拥有三个augmentation #GroupMultiScaleCrop,GroupRandomHorizontalFlip #here GropMultiScaleCrop ,is a easily method for 裁剪边用固定位置的crop并最终resize 到 224 ,采用的插值方式,为双线性插值 #GroupRandomHorizontalFlip train_augmentation = model.get_augmentation() print(args.gpus) model = model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 #解释说这里为什么要有roll,主要还是考虑到我们所训练的是对于BGR 还是RGB train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="im{}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="im{}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") #see the optim policy for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) # general the lr here is 1e-3 optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #如果说这里是验证过程,如果说不是验证过程 if args.evaluate: validate(val_loader, model, criterion, 0) return viz = vis.Visualizer() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, viz) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, epoch, viz=viz) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'test_crops': model.state_dict(), 'best_prec1': prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(339, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) _, cnn = list(model.named_children())[0] for p in cnn.parameters(): p.requires_grad = False crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # remove if not transfer learning checkpoint = torch.load('/home/ec2-user/mit_weights.pth.tar') model.load_state_dict(checkpoint['state_dict']) for module in list( list(model._modules['module'].children()) [-1].children())[-1].children(): module[-1] = nn.Linear(256, num_class) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True model.cuda() # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.Adam(model.parameters(), args.lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): scheduler.step() # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) summary_writer.close()
def eval_one_model(num_class, modality, weights, devices, args): # init model net = TSN(num_class, 1, modality, base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout, mdl=args.mdl, pretrained=False) # load checkpoint checkpoint = torch.load(weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = checkpoint['state_dict'] # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} net.load_state_dict(base_dict) # transformer if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) # prepare dataset if args.dataset == 'ucf101': naming_pattern = "frame{:06d}.jpg" if modality in [ "RGB", "RGBDiff", 'tvl1' ] else args.flow_prefix + "{}_{:06d}.jpg" else: naming_pattern = "image_{:05d}.jpg" if modality in [ "RGB", "RGBDiff" ] else args.flow_prefix + "{}_{:05d}.jpg" data_loader = torch.utils.data.DataLoader(TSNDataSet( os.path.join(args.data_root_path, ('jpegs_256' if modality == 'RGB' else 'tvl1_flow')), args.test_list, num_segments=args.test_segments, new_length=4 if modality == "RGB" else 6, modality=modality, image_tmpl=naming_pattern, test_mode=True, dataset=args.dataset, transform=torchvision.transforms.Compose([ cropping, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) data_gen = iter(data_loader) total_num = len(data_loader.dataset) output = [] # [class probability, label code] # Inferencing net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) net.eval() max_num = len(data_loader.dataset) for i in tqdm(range(max_num)): data, label = next(data_gen) if i >= max_num: break output.append( eval_video(net, (i, data, label), num_class, modality, args)) video_pred = [np.argmax(np.mean(x[1], axis=0)) for x in output] video_labels = [x[2] for x in output] # summarize results cf = confusion_matrix(video_labels, video_pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit / cls_cnt print('Accuracy of {}, {:.02f}%'.format(modality, np.mean(cls_acc) * 100)) del net del data_loader class_acc_map = class_acc_mapping(cls_acc, args.dataset) return output, video_labels, class_acc_map
def main(): check_rootfolders() global best_prec1 if args.run_for == 'train': categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) elif args.run_for == 'test': categories, args.test_list, args.root_path, prefix = datasets_video.return_data( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'STModeling', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] # best_prec1 = 0 model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) #print(model) cudnn.benchmark = True # Data loading code if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')): normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 elif args.modality == 'RGBFlow': data_length = args.num_motion if args.run_for == 'train': train_loader = torch.utils.data.DataLoader(TSNDataSet( "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) val_loader = torch.utils.data.DataLoader(TSNDataSet( "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print( ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.consensus_type == 'DNDF': params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): if not args.consensus_type == 'DNDF': adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) elif args.run_for == 'test': print("=> loading checkpoint '{}'".format(args.root_weights)) checkpoint = torch.load(args.root_weights) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) model.cuda().eval() print("=> loaded checkpoint ") test_loader = torch.utils.data.DataLoader(TSNDataSet( "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data", args.test_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # cam = cv2.VideoCapture(0) # cam.set(cv2.CAP_PROP_FPS, 48) # for i, (input, _) in enumerate(test_loader): # with torch.no_grad(): # input_var = torch.autograd.Variable(input) # # ret, frame = cam.read() # frame_map = np.full((280, 640, 3), 0, np.uint8) # frame_map = frame # print(frame_map) # while (True): # bg = np.full((480, 1200, 3), 15, np.uint8) # bg[:480, :640] = frame # # font = cv2.FONT_HERSHEY_SIMPLEX # # cv2.rectangle(bg, (128, 48), (640 - 128, 480 - 48), (0, 255, 0), 3) # # cv2.imshow('preview', bg) # # if cv2.waitKey(1) & 0xFF == ord('q'): # break test(test_loader, model, categories)
transform=torchvision.transforms.Compose([ cropping, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) net.eval() data_gen = enumerate(data_loader) total_num = len(data_loader.dataset) output = [] def eval_video(video_data): i, data, label = video_data num_crop = args.test_crops if args.modality == 'RGB': length = 3 elif args.modality == 'Flow':
def main(): logger.auto_set_dir() global args, best_prec1 import argparse parser = argparse.ArgumentParser(description="PyTorch implementation of Temporal Segment Networks") parser.add_argument('--dataset', type=str,default="something", choices=['something', 'jester', 'moments']) parser.add_argument('--modality', type=str, default="RGB", choices=['RGB', 'Flow']) parser.add_argument('--train_list', type=str, default="") parser.add_argument('--val_list', type=str, default="") parser.add_argument('--root_path', type=str, default="") parser.add_argument('--store_name', type=str, default="") # ========================= Model Configs ========================== parser.add_argument('--arch', type=str, default="BNInception") parser.add_argument('--num_segments', type=int, default=3) parser.add_argument('--consensus_type', type=str, default='avg') parser.add_argument('--k', type=int, default=3) parser.add_argument('--dropout', '--do', default=0.8, type=float, metavar='DO', help='dropout ratio (default: 0.5)') parser.add_argument('--loss_type', type=str, default="nll", choices=['nll']) parser.add_argument('--img_feature_dim', default=256, type=int, help="the feature dimension for each frame") # ========================= Learning Configs ========================== parser.add_argument('--epochs', default=120, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate') parser.add_argument('--lr_steps', default=[50, 100], type=float, nargs="+", metavar='LRSteps', help='epochs to decay learning rate by 10') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float, metavar='W', help='weight decay (default: 5e-4)') parser.add_argument('--clip-gradient', '--gd', default=20, type=float, metavar='W', help='gradient norm clipping (default: disabled)') parser.add_argument('--no_partialbn', '--npb', default=False, action="store_true") # ========================= Monitor Configs ========================== parser.add_argument('--print-freq', '-p', default=20, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--eval-freq', '-ef', default=5, type=int, metavar='N', help='evaluation frequency (default: 5)') # ========================= Runtime Configs ========================== parser.add_argument('-j', '--workers', default=30, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') parser.add_argument('--snapshot_pref', type=str, default="") parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('--gpu', type=str, default='4') parser.add_argument('--flow_prefix', default="", type=str) parser.add_argument('--root_log', type=str, default='log') parser.add_argument('--root_model', type=str, default='model') parser.add_argument('--root_output', type=str, default='output') args = parser.parse_args() args.consensus_type = "TRN" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device_ids = [int(id) for id in args.gpu.split(',')] assert len(device_ids) >1, "TRN must run with GPU_num > 1" args.root_log = logger.get_logger_dir() args.root_model = logger.get_logger_dir() args.root_output = logger.get_logger_dir() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join(['TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments]) print('storing name: ' + args.store_name) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model)#TODO, , device_ids=[int(id) for id in args.gpu.split(',')] if torch.cuda.is_available(): model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: logger.info('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() if not os.path.exists(args.record_path + args.modality.lower()): os.mkdir(args.record_path + args.modality.lower()) num_class = 2 model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model = model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_set = TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="frame_{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else "{:06d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])) train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_set = TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="frame_{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else "{:06d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1, pred_dict = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: with open( args.record_path + args.modality.lower() + '/' + args.snapshot_pref + args.modality.lower() + '_video_preds.pickle', 'wb') as f: pickle.dump(pred_dict, f) f.close() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)