class ExtructFeatrue(object): def __init__(self): saved = torch.load( '/mnt/workspace/model/activitynet_clip_kinetics600_dpn107_rgb_model/activitynet_clip_600_dpn107_rgb_model_best_074.pth.tar' ) self.model = TSN(201, 3, 'RGB', 'dpn107', 1) self.train_augmentation = self.model.get_augmentation() self.input_mean = self.model.input_mean self.input_std = self.model.input_std self.softmax = nn.Softmax(dim=-1).cuda() self.model = nn.DataParallel(self.model) self.model.load_state_dict(saved['state_dict']) self.base_model = nn.DataParallel(self.model.module.base_model).cuda() self.new_fc = nn.DataParallel(self.model.module.new_fc).cuda() self.model.eval() self.base_model.eval() self.new_fc.eval() def loadFeatrue(self, x): midfeature = self.base_model(x) classfeature = self.softmax(self.new_fc(midfeature)) return midfeature, classfeature
def load_net(RGBweights, Flowweights): # weights: model weight global RGBnet global Flownet global num_class #******************* load RGB Net ********************** print('Loading RGB Net......') RGBnet = TSN(num_class, 1, 'RGB', base_model='BNInception', consensus_type='avg', dropout=0.7) checkpoint = torch.load(RGBweights) base_dict_RGB = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} RGBnet.load_state_dict(base_dict_RGB) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) #******************* load RGB Net ********************** print('Loading Flow Net......') Flownet = TSN(num_class, 1, 'Flow', base_model='BNInception', consensus_type='avg', dropout=0.7) checkpoint = torch.load(Flowweights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict_Flow = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} Flownet.load_state_dict(base_dict_Flow)
def opf_model(): net = TSN(2, 1, 'Flow', base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout) checkpoint = torch.load("475_inceptionv4__flow_model_best.pth.tar") # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) net.load_state_dict(checkpoint['state_dict']) return net
def rgb_model(): net = TSN(2, 1, 'RGB', base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout) checkpoint = torch.load(args.rgb_weights) # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) net.load_state_dict(checkpoint['state_dict']) return net
def get_executor(use_gpu=True): # torch_module = MobileNetV2(n_class=27) # if not os.path.exists("mobilenetv2_jester_online.pth.tar"): # checkpoint not downloaded # print('Downloading PyTorch checkpoint...') # import urllib.request # url = 'https://file.lzhu.me/projects/tsm/models/mobilenetv2_jester_online.pth.tar' # urllib.request.urlretrieve(url, './mobilenetv2_jester_online.pth.tar') # torch_module.load_state_dict(torch.load("mobilenetv2_jester_online.pth.tar")) # torch_inputs = (torch.rand(1, 3, 224, 224), # torch.zeros([1, 3, 56, 56]), # torch.zeros([1, 4, 28, 28]), # torch.zeros([1, 4, 28, 28]), # torch.zeros([1, 8, 14, 14]), # torch.zeros([1, 8, 14, 14]), # torch.zeros([1, 8, 14, 14]), # torch.zeros([1, 12, 14, 14]), # torch.zeros([1, 12, 14, 14]), # torch.zeros([1, 20, 7, 7]), # torch.zeros([1, 20, 7, 7])) torch_module = TSN(2, 1, 'RGB', base_model='mobilenetv2', consensus_type='avg', img_feature_dim=256, pretrain='imagenet', # is_shift=False, shift_div=8, shift_place='blockres', is_shift=True, shift_div=8, shift_place='blockres', # non_local='_nl' in './checkpoint/TSM_HockeyFights_RGB_mobilenetv2_shift8_blockres_avg_segment8_e100/ckpt.best.pth.tar', non_local='_nl' in pt_path, ) checkpoint = torch.load( # './checkpoint/TSM_HockeyFights_RGB_mobilenetv2_shift8_blockres_avg_segment8_e100/ckpt.best.pth.tar') pt_path) checkpoint = checkpoint['state_dict'] # base_dict = {('base_model.' + k).replace('base_model.fc', 'new_fc'): v for k, v in list(checkpoint.items())} base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint.items())} replace_dict = {'base_model.classifier.weight': 'new_fc.weight', 'base_model.classifier.bias': 'new_fc.bias', } for k, v in replace_dict.items(): if k in base_dict: base_dict[v] = base_dict.pop(k) torch_module.load_state_dict(base_dict) torch_inputs = (torch.rand(1, 24, 224, 224)) # torch_inputs = torch.rand(1, 24, 224, 224) if use_gpu: target = 'cuda' else: target = 'llvm -mcpu=cortex-a72 -target=armv7l-linux-gnueabihf' return torch2executor(torch_module, torch_inputs, target)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'myDataset': num_class = 12 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return f, axs = plt.subplots(4, 1, figsize=(10, 5)) if args.start_epoch == 0: train_acc = [] train_loss = [] val_acc = [] val_loss = [] epochs = [] val_epochs = [] else: train_acc = np.load("./%s/train_acc.npy" % args.snapshot_pref).tolist() train_loss = np.load("./%s/train_loss.npy" % args.snapshot_pref).tolist() val_acc = np.load("./%s/val_acc.npy" % args.snapshot_pref).tolist() val_loss = np.load("./%s/val_loss.npy" % args.snapshot_pref).tolist() epochs = np.load("./%s/epochs.npy" % args.snapshot_pref).tolist() val_epochs = np.load("./%s/val_epochs.npy" % args.snapshot_pref).tolist() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch acc, loss = train(train_loader, model, criterion, optimizer, epoch) train_acc.append(acc) train_loss.append(loss) epochs.append(epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1, v_loss = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) val_acc.append(prec1) val_loss.append(v_loss) val_epochs.append(epoch) axs[0].plot(val_epochs, val_loss, c='b', marker='.', label='val_loss') axs[1].plot(val_epochs, val_acc, c='r', marker='.', label='val_acc') axs[2].plot(epochs, train_loss, c='b', marker='.', label='train_loss') axs[3].plot(epochs, train_acc, c='r', marker='.', label='train_acc') plt.title('TSN_' + args.snapshot_pref) if epoch == 0: for i in range(4): axs[i].legend(loc='best') plt.pause(0.000001) if not os.path.exists(args.snapshot_pref): os.makedirs(args.snapshot_pref) plt.savefig('./%s/%s.jpg' % (args.snapshot_pref, str(epoch).zfill(5))) np.save("./%s/train_acc.npy" % args.snapshot_pref, train_acc) np.save("./%s/train_loss.npy" % args.snapshot_pref, train_loss) np.save("./%s/val_acc.npy" % args.snapshot_pref, val_acc) np.save("./%s/val_loss.npy" % args.snapshot_pref, val_loss) np.save("./%s/val_epochs.npy" % args.snapshot_pref, val_epochs) np.save("./%s/epochs.npy" % args.snapshot_pref, epochs)
num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset '+args.dataset) net = TSN(num_class, 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} net.load_state_dict(base_dict) if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose([ GroupOverSample(net.input_size, net.scale_size) ]) else: raise ValueError("Only 1 and 10 crops are supported while we got {}".format(args.test_crops)) data_loader = torch.utils.data.DataLoader( TSNDataSet("", args.test_list, num_segments=args.test_segments,
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality) num_class = len(categories) net = TSN(num_class, args.test_segments if args.crop_fusion_type in ['TRN','TRNmultiscale'] else 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, img_feature_dim=args.img_feature_dim, ) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} net.load_state_dict(base_dict) if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose([ GroupOverSample(net.input_size, net.scale_size) ]) else: raise ValueError("Only 1 and 10 crops are supported while we got {}".format(args.test_crops)) data_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.val_list, num_segments=args.test_segments,
def main(): torch.set_printoptions(precision=6) global args, best_prec1 args = parser.parse_args() #导入参数设置数据集类数量 if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'cad': num_class = 8 else: raise ValueError('Unknown dataset ' + args.dataset) """ #导入模型,输入包含分类的类别数: # num_class;args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3; # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等; # 采用哪种模型:args.arch,比如resnet101,BNInception等; # 不同输入snippet的融合方式:args.consensus_type,比如avg等; # dropout参数:args.dropout。 """ model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() """ 接着main函数的思路,前面这几行都是在TSN类中定义的变量或者方法,model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()是设置多GPU训练模型。 args.resume这个参数主要是用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练, 因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径。 其中checkpoint = torch.load(args.resume)是用来导入已训练好的模型。 model.load_state_dict(checkpoint[‘state_dict’])是完成导入模型的参数初始化model这个网络的过程,load_state_dict是torch.nn.Module类中重要的方法之一。 """ if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 """ 接下来是main函数中的第二部分:数据导入。首先是自定义的TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型, 一般而言在PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类,比如此处的TSNDataSet类,然后通过重写初始化函数__init__和__getitem__方法来读取数据。 torch.utils.data.Dataset类型的数据并不能作为模型的输入,还要通过torch.utils.data.DataLoader类进一步封装, 这是因为数据读取类TSNDataSet返回两个值,第一个值是Tensor类型的数据,第二个值是int型的标签, 而torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。 对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了。 """ train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=3, pin_memory=True) """ 接下来就是main函数的第三部分:训练模型。这里包括定义损失函数、优化函数、一些超参数设置等,然后训练模型并在指定epoch验证和保存模型。 adjust_learning_rate(optimizer, epoch, args.lr_steps)是设置学习率变化策略,args.lr_steps是一个列表,里面的值表示到达多少个epoch的时候要改变学习率, 在adjust_learning_rate函数中,默认是修改学习率的时候修改成当前的0.1倍。 train(train_loader, model, criterion, optimizer, epoch)就是训练模型,输入包含训练数据、模型、损失函数、优化函数和要训练多少个epoch。 最后的if语句是当训练epoch到达指定值的时候就进行一次模型验证和模型保存,args.eval_freq这个参数就是用来控制保存的epoch值。 prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))就是用训练好的模型验证测试数据集。 最后的save_checkpoint函数就是保存模型参数(model)和其他一些信息,这里我对源代码做了修改,希望有助于理解,该函数中主要就是调用torch.save(mode, save_path)来保存模型。 模型训练函数train和模型验证函数validate函数是重点,后面详细介绍。 """ # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) ''' optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) ''' # try Adam instead. optimizer = torch.optim.Adam(policies, args.lr) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
crop_size = first_model.crop_size scale_size = first_model.scale_size input_mean = first_model.input_mean input_std = first_model.input_std first_model = torch.nn.DataParallel(first_model, device_ids=args.gpus).cuda() second_model = torch.nn.DataParallel(second_model, device_ids=args.gpus).cuda() if os.path.isfile(args.first_model_path): print(("=> loading checkpoint '{}'".format(args.first_model_path))) checkpoint = torch.load(args.first_model_path) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] first_model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint epoch {}".format(checkpoint['epoch']))) else: ValueError( ('No check point found at "{}"'.format(args.first_model_path))) if os.path.isfile(args.second_model_path): print(("=> loading checkpoint '{}'".format(args.second_model_path))) checkpoint = torch.load(args.second_model_path) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] second_model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint epoch {}".format(checkpoint['epoch']))) else: ValueError( ('No check point found at "{}"'.format(args.second_model_path)))
from tqdm import tqdm for checkpoint_name in tqdm(checkpoint_names): checkpoint = torch.load(checkpoint_name) print(checkpoint_name) """ base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())} for key in ['consensus.fc_fusion_scales.6.3.bias', 'consensus.fc_fusion_scales.5.3.bias', 'consensus.fc_fusion_scales.4.3.bias', 'consensus.fc_fusion_scales.3.3.bias', 'consensus.fc_fusion_scales.2.3.bias', 'consensus.fc_fusion_scales.1.3.bias', 'consensus.fc_fusion_scales.0.3.bias', 'consensus.fc_fusion_scales.6.3.weight', 'consensus.fc_fusion_scales.5.3.weight', 'consensus.fc_fusion_scales.4.3.weight', 'consensus.fc_fusion_scales.3.3.weight', 'consensus.fc_fusion_scales.2.3.weight', 'consensus.fc_fusion_scales.1.3.weight', 'consensus.fc_fusion_scales.0.3.weight']: del base_dict[key] #print(base_dict) """ #net.load_state_dict(base_dict, strict=False) net.load_state_dict(checkpoint, strict=True) #print(net) #exit(0) net.eval() net.cuda() # Initialize frame transforms. transform = torchvision.transforms.Compose([ transforms.GroupOverSample(net.module.input_size, net.module.scale_size), transforms.Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), transforms.ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), transforms.GroupNormalize(net.module.input_mean, net.module.input_std), ]) segments_gt = [0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join(['TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments]) print('storing name: ' + args.store_name) model = TSN(2, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) checkpoint = torch.load('pretrain/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar', map_location='cpu') base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())} for key in ['consensus.fc_fusion_scales.6.3.bias', 'consensus.fc_fusion_scales.5.3.bias', 'consensus.fc_fusion_scales.4.3.bias', 'consensus.fc_fusion_scales.3.3.bias', 'consensus.fc_fusion_scales.2.3.bias', 'consensus.fc_fusion_scales.1.3.bias', 'consensus.fc_fusion_scales.0.3.bias', 'consensus.fc_fusion_scales.6.3.weight', 'consensus.fc_fusion_scales.5.3.weight', 'consensus.fc_fusion_scales.4.3.weight', 'consensus.fc_fusion_scales.3.3.weight', 'consensus.fc_fusion_scales.2.3.weight', 'consensus.fc_fusion_scales.1.3.weight', 'consensus.fc_fusion_scales.0.3.weight']: del base_dict[key] # print(base_dict) model.load_state_dict(base_dict, strict=False) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # val_loader = torch.utils.data.DataLoader( # TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments, # new_length=data_length, # modality=args.modality, # image_tmpl=prefix, # random_shift=False, # transform=torchvision.transforms.Compose([ # GroupScale(int(scale_size)), # GroupCenterCrop(crop_size), # Stack(roll=(args.arch in ['BNInception','InceptionV3'])), # ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), # normalize, # ])), # batch_size=args.batch_size, shuffle=False, # num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': weight = torch.ones([2]).cuda() weight[0] = 1.2 pos_weight = torch.ones([2]).cuda() #pos_weight[0] = 2 criterion = torch.nn.BCEWithLogitsLoss(weight = weight, pos_weight=pos_weight).cuda() #criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, 0.0001, momentum=args.momentum, weight_decay=args.weight_decay) # if args.evaluate: # validate(val_loader, model, criterion, 0) # return log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth.tar'.format(epoch)) torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth'.format(epoch)) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) ''' consensue_type = avg base_model = resnet_101 dropout : 0.5 ''' model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) #224 crop_size = model.crop_size #256/224 scale_size = model.scale_size # for each modiltiy is different input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() #这里拥有三个augmentation #GroupMultiScaleCrop,GroupRandomHorizontalFlip #here GropMultiScaleCrop ,is a easily method for 裁剪边用固定位置的crop并最终resize 到 224 ,采用的插值方式,为双线性插值 #GroupRandomHorizontalFlip train_augmentation = model.get_augmentation() print(args.gpus) model = model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 #解释说这里为什么要有roll,主要还是考虑到我们所训练的是对于BGR 还是RGB train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="im{}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="im{}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") #see the optim policy for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) # general the lr here is 1e-3 optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #如果说这里是验证过程,如果说不是验证过程 if args.evaluate: validate(val_loader, model, criterion, 0) return viz = vis.Visualizer() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, viz) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, epoch, viz=viz) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'test_crops': model.state_dict(), 'best_prec1': prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.test_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code # Four types of input modalities for two-stream ConvNets (one stream spatial and the other temporal): a single RGB image, stacked RGB difference, # stacked optical flow field, and stacked warped optical flow field; the spatial stream ConvNet operates on a single RGB images, # and the temporal stream ConvNet takes a stack of consecutive optical flow fields as input. # A single RGB image usually encodes static appearance at a specific time point and lacks the contextual information about previous and next frames. # RGB difference between two consecutive frames describe the appearance change, which may correspond to the motion salient region. # Optical flow fields may not concentrate on the human action; the warped optical flow suppresses the background motion and makes motion concentrate # on the actor. if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 # Division between train and val set train_loader = torch.utils.data.DataLoader( TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack( roll=(args.arch in ['BNInception', 'InceptionV3']) ), # Batch-Normalization-Inception, InceptionV3: evolution of InceptionV2 of GoogleNet ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def eval_one_model(num_class, modality, weights, devices, args): # init model net = TSN(num_class, 1, modality, base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout, mdl=args.mdl, pretrained=False) # load checkpoint checkpoint = torch.load(weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = checkpoint['state_dict'] # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} net.load_state_dict(base_dict) # transformer if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) # prepare dataset if args.dataset == 'ucf101': naming_pattern = "frame{:06d}.jpg" if modality in [ "RGB", "RGBDiff", 'tvl1' ] else args.flow_prefix + "{}_{:06d}.jpg" else: naming_pattern = "image_{:05d}.jpg" if modality in [ "RGB", "RGBDiff" ] else args.flow_prefix + "{}_{:05d}.jpg" data_loader = torch.utils.data.DataLoader(TSNDataSet( os.path.join(args.data_root_path, ('jpegs_256' if modality == 'RGB' else 'tvl1_flow')), args.test_list, num_segments=args.test_segments, new_length=4 if modality == "RGB" else 6, modality=modality, image_tmpl=naming_pattern, test_mode=True, dataset=args.dataset, transform=torchvision.transforms.Compose([ cropping, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) data_gen = iter(data_loader) total_num = len(data_loader.dataset) output = [] # [class probability, label code] # Inferencing net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) net.eval() max_num = len(data_loader.dataset) for i in tqdm(range(max_num)): data, label = next(data_gen) if i >= max_num: break output.append( eval_video(net, (i, data, label), num_class, modality, args)) video_pred = [np.argmax(np.mean(x[1], axis=0)) for x in output] video_labels = [x[2] for x in output] # summarize results cf = confusion_matrix(video_labels, video_pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit / cls_cnt print('Accuracy of {}, {:.02f}%'.format(modality, np.mean(cls_acc) * 100)) del net del data_loader class_acc_map = class_acc_mapping(cls_acc, args.dataset) return output, video_labels, class_acc_map
class TSN_BIT(nn.Module): def __init__(self): super(TSN_BIT, self).__init__() self.tsn = TSN(num_class, num_segments=num_segments, modality=modality, base_model=arch, consensus_type=crop_fusion_type, dropout=0.7) self.activation = nn.LeakyReLU() self.fc1 = nn.Linear(101, 32) self.fc2 = nn.Linear(32, 8) self.model_name = '2019-01-20_23-57-32.pth' self._load_tsn_rgb_weight() # self._load_pretrained_model(self.model_name) def _load_pretrained_model(self, model_name): """ Load pretrained model that contains all weights for all layers """ checkpoint = torch.load('/home/zhufl/videoPrediction/BIT_train_test/' + model_name) print("Number of parameters recovered from modeo {} is {}".format( model_name, len(checkpoint))) model_state = self.state_dict() base_dict = {k: v for k, v in checkpoint.items() if k in model_state} missing_dict = { k: v for k, v in model_state.items() if k not in base_dict } for key, value in missing_dict.items(): print("Missing motion branch param {}".format(key)) model_state.update(base_dict) self.load_state_dict(model_state) def _load_tsn_rgb_weight(self): """ Loading Flow Weights and then fine-tune fc layers """ flow_weights = '/home/zhufl/Workspace/tsn-pytorch/ucf101_rgb.pth' checkpoint = torch.load(flow_weights) base_dict = {} count = 0 for k, v in checkpoint.items(): count = count + 1 print count, k if 415 > count > 18: base_dict.setdefault(k[7:], checkpoint[k]) if count < 19: base_dict.setdefault(k, checkpoint[k]) base_dict.setdefault( 'new_fc.weight', checkpoint['base_model.fc-action.1.weight']) base_dict.setdefault('new_fc.bias', checkpoint['base_model.fc-action.1.bias']) self.tsn.load_state_dict(base_dict) def forward(self, input): x = self.activation(self.tsn(input)) x = self.activation(self.fc1(x)) x = self.fc2(x) return x
def main(): global args args = parser.parse_args() print("------------------------------------") print("Environment Versions:") print("- Python: {}".format(sys.version)) print("- PyTorch: {}".format(torch.__version__)) print("- TorchVison: {}".format(torchvision.__version__)) args_dict = args.__dict__ print("------------------------------------") print(args.arch+" Configurations:") for key in args_dict.keys(): print("- {}: {}".format(key, args_dict[key])) print("------------------------------------") if args.dataset == 'ucf101': num_class = 101 rgb_read_format = "{:06d}.jpg" # Format for THUMOS14 videos # rgb_read_format = "{:05d}.jpg" elif args.dataset == 'hmdb51': num_class = 51 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'kinetics': num_class = 400 rgb_read_format = "{:04d}.jpg" elif args.dataset == 'something': num_class = 174 rgb_read_format = "{:04d}.jpg" else: raise ValueError('Unknown dataset '+args.dataset) model = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std if _CUDA: model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # CUDA print_model(model) if not _CUDA: model = torch.nn.DataParallel(model) # CPU print("pretrained_parts: ", args.pretrained_parts) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) if _CUDA: checkpoint = torch.load(args.resume) # CUDA else: checkpoint = torch.load(args.resume, map_location='cpu') # CPU # if not checkpoint['lr']: if "lr" not in checkpoint.keys(): args.lr = input("No 'lr' attribute found in resume model, please input the 'lr' manually: ") args.lr = float(args.lr) else: args.lr = checkpoint['lr'] args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch: {}, lr: {})" .format(args.resume, checkpoint['epoch'], args.lr))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) else: print("Please specify the checkpoint to pretrained model") return cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': #input_mean = [0,0,0] #for debugging normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 end = time.time() # data_loader = torch.utils.data.DataLoader( dataset = TSNDataSet("", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix+rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=True), ToTorchFormatTensor(div=False), #Stack(roll=(args.arch == 'C3DRes18') or (args.arch == 'ECO') or (args.arch == 'ECOfull') or (args.arch == 'ECO_2FC')), #ToTorchFormatTensor(div=(args.arch != 'C3DRes18') and (args.arch != 'ECO') and (args.arch != 'ECOfull') and (args.arch != 'ECO_2FC')), normalize, ]), test_mode=True, window_size=_WINDOW_SIZE, window_stride=_WINDOW_STRIDE); data_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=collate_fn) # criterion = torch.nn.CrossEntropyLoss().cuda() # predict(data_loader, model, criterion, 0) predict(dataset, model, criterion=None, iter=0) # profile_model(model) elapsed_time = time.time() - end print("STATS_TOT_WINDOWS={0}, Total prediction time={1}".format(STATS_TOT_WINDOWS, elapsed_time)) return
def main(): global args, best_prec1 args = Parse_args() log.l.info('Input command:\n ===========> python ' + ' '.join(sys.argv) + ' ===========>') if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'mm': num_class = 500 elif args.dataset == 'thumos14': num_class = 21 else: raise ValueError('Unknown dataset ' + args.dataset) log.l.info( '============= prepare the model and model\'s parameters =============' ) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: log.l.info( '============== train from checkpoint (finetune mode) =================' ) if os.path.isfile(args.resume): log.l.info(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) log.l.info(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: log.l.info(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True log.l.info('============== Now, loading data ... ==============\n') if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(PerFrameData( args.frames_root, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, data_gap=args.data_gap, test_mode=False, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.data_workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(PerFrameData( args.frames_root, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, data_gap=args.data_gap, test_mode=True, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.data_workers, pin_memory=True) log.l.info( '================= Now, define loss function and optimizer ==============' ) weight = torch.from_numpy(np.array([1] + [3] * (num_class - 1))) if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: log.l.info( ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: log.l.info('Need val the data first...') validate(val_loader, model, criterion, 0) log.l.info( '\n\n===================> TRAIN and VAL begins <===================\n') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 num_class = 4 rgb_read_format = "{:d}.jpg" model = TSN(num_class, args.num_segments, args.pretrained_parts, 'RGB', base_model='ECO', consensus_type='identity', dropout=0.3, partial_bn=True) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std # Optimizer s also support specifying per-parameter options. # To do this, pass in an iterable of dict s. # Each of them will define a separate parameter group, # and should contain a params key, containing a list of parameters belonging to it. # Other keys should match the keyword arguments accepted by the optimizers, # and will be used as optimization options for this group. policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() model_dict = model.state_dict() print("pretrained_parts: ", args.pretrained_parts) model_dir = args.model_path new_state_dict = torch.load(model_dir)['state_dict'] un_init_dict_keys = [ k for k in model_dict.keys() if k not in new_state_dict ] print("un_init_dict_keys: ", un_init_dict_keys) print("\n------------------------------------") for k in un_init_dict_keys: new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_() if 'weight' in k: if 'bn' in k: print("{} init as: 1".format(k)) constant_(new_state_dict[k], 1) else: print("{} init as: xavier".format(k)) xavier_uniform_(new_state_dict[k]) elif 'bias' in k: print("{} init as: 0".format(k)) constant_(new_state_dict[k], 0) print("------------------------------------") model.load_state_dict(new_state_dict) cudnn.benchmark = True # Data loading code normalize = GroupNormalize(input_mean, input_std) data_length = 1 val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality='RGB', image_tmpl=rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=True), ToTorchFormatTensor(div=False), normalize, ])), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) model.eval() for i, (input, target) in enumerate(val_loader): target = target.cuda() input_var = input target_var = target output = model(input_var) _, pred = output.data.topk(1, 1, True, True) print(pred, target) print('done')
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'movie': num_class = 21 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] #best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSetMovie( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="frame_{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSetMovie( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="frame_{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=int(args.batch_size / 2), shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer #if args.loss_type == 'nll': #criterion = torch.nn.CrossEntropyLoss().cuda() #else: #raise ValueError("Unknown loss type") #class_weight = torch.tensor([1] * 21).cuda().float() #pos_weight = torch.tensor([1] * 21).cuda().float() criterion = torch.nn.BCEWithLogitsLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) zero_time = time.time() best_map = 0 print('Start training...') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch start_time = time.time() trainloss = train(train_loader, model, criterion, optimizer, epoch) print('Traing loss %4f Epoch %d' % (trainloss, epoch)) if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: valloss, mAP, wAP, output_mtx = validate(val_loader, model, criterion) end_time = time.time() epoch_time = end_time - start_time total_time = end_time - zero_time print('Total time used: %s Epoch %d time uesd: %s' % (str(datetime.timedelta(seconds=int(total_time))), epoch, str(datetime.timedelta(seconds=int(epoch_time))))) print( 'Train loss: {0:.4f} val loss: {1:.4f} mAP: {2:.4f} wAP: {3:.4f}' .format(trainloss, valloss, mAP, wAP)) # evaluate on validation set is_best = mAP > best_map if mAP > best_map: best_map = mAP # checkpoint_name = "%04d_%s" % (epoch+1, "checkpoint.pth.tar") checkpoint_name = "best_checkpoint.pth.tar" save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best, epoch) npy_name = str(epoch) + args.result_path np.save(npy_name, output_mtx) with open(args.record_path, 'a') as file: file.write( 'Epoch:[{0}]' 'Train loss: {1:.4f} val loss: {2:.4f} map: {3:.4f}\n'. format(epoch + 1, trainloss, valloss, mAP)) print('************ Done!... ************')
def main(): parser = options() args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'saag01': num_class = 2 else: raise ValueError('Unknown dataset ' + args.dataset) if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=0.5, partial_bn=False) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_size = model.input_size input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() cropping = torchvision.transforms.Compose([ GroupScale(scale_size), GroupCenterCrop(input_size), ]) checkpoint = torch.load(args.checkpoint) start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] state_dict = checkpoint['state_dict'] # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model.load_state_dict(state_dict) test_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.test_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.img_prefix + "_{:05d}" + args.ext if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "_{}_{:05d}" + args.ext, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(input_mean, input_std), ]), custom_prefix=args.custom_prefix), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) ### Test ### test(model, test_loader, args)
def main(conf, test_set, test_part=-1): gulp_path = os.path.join(conf.gulp_test_dir, conf.modality.lower(), 'test', test_set) gulp_path = os.path.realpath(gulp_path) gulp_path = Path(gulp_path) classes_map = pickle.load(open(conf.classes_map, "rb")) conf.num_classes = count_num_classes(classes_map) net = TSN(conf.num_classes, 1, conf.modality, base_model=conf.arch, consensus_type=conf.crop_fusion_type, dropout=conf.dropout) checkpoint = torch.load(conf.weights) print("Model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } net.load_state_dict(base_dict) if conf.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif conf.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( conf.test_crops)) class_type = 'verb+noun' if conf.class_type == 'action' else conf.class_type if conf.modality == 'Flow': dataset = EpicVideoFlowDataset(gulp_path=gulp_path, class_type=class_type) else: dataset = EpicVideoDataset(gulp_path=gulp_path, class_type=class_type) data_loader = torch.utils.data.DataLoader(EpicTSNTestDataset( dataset, classes_map, num_segments=conf.test_segments, new_length=1 if conf.modality == "RGB" else 5, modality=conf.modality, transform=torchvision.transforms.Compose([ cropping, Stack(roll=conf.arch == 'BNInception'), ToTorchFormatTensor(div=conf.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std), ]), part=test_part), batch_size=1, shuffle=False, num_workers=conf.workers * 2, pin_memory=True) net = torch.nn.DataParallel(net, device_ids=conf.gpus).cuda() net.eval() total_num = len(data_loader.dataset) output = [] proc_start_time = time.time() for i, (keys, input_) in enumerate(data_loader): rst = eval_video(conf, (i, keys, input_), net) output.append(rst[1:]) cnt_time = time.time() - proc_start_time print('video {} done, total {}/{}, average {} sec/video'.format( i, i + 1, total_num, float(cnt_time) / (i + 1))) video_index = [x[0] for x in output] scores = [x[1] for x in output] save_scores = './{}/tsn_{}_{}_testset_{}_{}_lr_{}_model_{:03d}.npz'.format( conf.checkpoint, conf.class_type, conf.modality.lower(), test_set, conf.arch, conf.lr, checkpoint['epoch']) if test_part > 0: save_scores = save_scores.replace('.npz', '_part-{}.npz'.format(test_part)) np.savez(save_scores, segment_indices=video_index, scores=scores)
def get_pred(video_path, caption_path, opt): # options parser = argparse.ArgumentParser( description="TRN testing on the full validation set") # parser.add_argument('dataset', type=str, choices=['something','jester','moments','charades']) # parser.add_argument('modality', type=str, choices=['RGB', 'Flow', 'RGBDiff']) parser.add_argument('--dataset', type=str, default='somethingv2') parser.add_argument('--modality', type=str, default='RGB') parser.add_argument( '--weights', type=str, default= 'model/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar' ) parser.add_argument('--arch', type=str, default="BNInception") parser.add_argument('--save_scores', type=str, default=None) parser.add_argument('--test_segments', type=int, default=8) parser.add_argument('--max_num', type=int, default=-1) parser.add_argument('--test_crops', type=int, default=10) parser.add_argument('--input_size', type=int, default=224) parser.add_argument('--crop_fusion_type', type=str, default='TRNmultiscale', choices=['avg', 'TRN', 'TRNmultiscale']) parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--gpus', nargs='+', type=int, default=None) parser.add_argument('--img_feature_dim', type=int, default=256) parser.add_argument( '--num_set_segments', type=int, default=1, help='TODO: select multiply set of n-frames from a video') parser.add_argument('--softmax', type=int, default=0) args = parser.parse_args() def accuracy(output, target, topk=(1, )): """Computes the precision@k for the specified values of k""" maxk = max(topk) batch_size = target.size(0) prob, pred = output.topk(maxk, 1, True, True) prob = prob.t().data.numpy().squeeze() pred = pred.t().data.numpy().squeeze() return prob, pred categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality, opt) num_class = len(categories) net = TSN(num_class, args.test_segments if args.crop_fusion_type in ['TRN', 'TRNmultiscale'] else 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, img_feature_dim=args.img_feature_dim, opt=opt) try: checkpoint = torch.load(args.weights) except: args.weights = os.path.join(opt.project_root, 'scripts/Eval/', args.weights) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } net.load_state_dict(base_dict) if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) data_loader = torch.utils.data.DataLoader(TSNDataSet( video_path, caption_path, num_segments=args.test_segments, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl=prefix, test_mode=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) net = torch.nn.DataParallel(net.cuda()) net.eval() data_gen = enumerate(data_loader) output = [] def eval_video(video_data): i, data, label = video_data num_crop = args.test_crops if args.modality == 'RGB': length = 3 elif args.modality == 'Flow': length = 10 elif args.modality == 'RGBDiff': length = 18 else: raise ValueError("Unknown modality " + args.modality) input_var = torch.autograd.Variable(data.view(-1, length, data.size(2), data.size(3)), volatile=True) rst = net(input_var) if args.softmax == 1: # take the softmax to normalize the output to probability rst = F.softmax(rst) rst = rst.data.cpu().numpy().copy() if args.crop_fusion_type in ['TRN', 'TRNmultiscale']: rst = rst.reshape(-1, 1, num_class) else: rst = rst.reshape((num_crop, args.test_segments, num_class)).mean(axis=0).reshape( (args.test_segments, 1, num_class)) return i, rst, label[0] max_num = args.max_num if args.max_num > 0 else len(data_loader.dataset) prob_all, pred_all = [], [] for i, (data, label) in data_gen: if i >= max_num: break rst = eval_video((i, data, label)) output.append(rst[1:]) prob, pred = accuracy(torch.from_numpy(np.mean(rst[1], axis=0)), label, topk=(1, 174)) prob_all.append(prob) pred_all.append(pred) return prob_all, pred_all
def main(): global args, best_prec1, class_to_name parser.add_argument('--class_index', type=str, help='class index file') args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'something': num_class = 174 else: raise ValueError('Unknown dataset ' + args.dataset) if args.dataset == 'something': img_prefix = '' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = { idx: line.strip().replace(' ', '-') for idx, line in enumerate(content) } else: img_prefix = 'image_' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \ for line in content} with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(args), opt_file) if not (args.consensus_type == 'lstm' or args.consensus_type == 'conv_lstm'): args.lstm_out_type = None model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, lstm_out_type=args.lstm_out_type, lstm_layers=args.lstm_layers, lstm_hidden_dims=args.lstm_hidden_dims, conv_lstm_kernel=args.conv_lstm_kernel, bi_add_clf=args.bi_add_clf, bi_out_dims=args.bi_out_dims, bi_rank=args.bi_rank, bi_att_softmax=args.bi_att_softmax, bi_filter_size=args.bi_filter_size, bi_dropout=args.bi_dropout, bi_conv_dropout=args.bi_conv_dropout, get_att_maps=True, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # print(model) # input('...') model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) # print(model) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) # input('...') else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) rev_normalize = ReverseGroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 10 # data_length = 5 if args.val_reverse: val_temp_transform = ReverseFrames(size=data_length * args.num_segments) print('using reverse val') elif args.val_shuffle: val_temp_transform = ShuffleFrames(size=data_length * args.num_segments) print('using shuffle val') else: val_temp_transform = IdentityTransform() print('using normal val') val_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=img_prefix + "{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, temp_transform=val_temp_transform, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w') print('visualizing...') val_logger = os.path.join(args.result_path, 'visualize.log') validate(val_loader, model, 0, val_logger=val_logger, rev_normalize=rev_normalize) return
def main(): global args, best_prec1 args = parser.parse_args() print("args args args") print(args) check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) # optimizer = torch.optim.SGD(policies, # args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay) optimizer = torch.optim.Adam(policies, lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s_adam.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
for op in emmanuelleNet._op_list: #print("ID:", op[0].ljust(36),# "Op:", op[1].ljust(12), "Out:", op[2].ljust(36), "In:", op[3]) print(op[2].ljust(36), "<", op[1].ljust(12), "<", op[3]) print( "-----------------------------------------------------------------------------------------------------------------" ) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } originalNet.load_state_dict(base_dict) emmanuelleDict = { '.'.join(k.split('.')[2:]): v for k, v in list(checkpoint['state_dict'].items())[:-6] } # print("Emmanuelle dict", len(emmanuelleDict)) # for k, v in emmanuelleDict.items(): # print(k.ljust(50), ":", v.shape) emmanuelleNet.load_state_dict(emmanuelleDict) if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(originalNet.scale_size), GroupCenterCrop(originalNet.input_size),
def main(): global args, best_prec1 args = parser.parse_args() print("------------------------------------") print("Environment Versions:") print("- Python: {}".format(sys.version)) print("- PyTorch: {}".format(torch.__version__)) print("- TorchVison: {}".format(torchvision.__version__)) args_dict = args.__dict__ print("------------------------------------") print(args.arch + " Configurations:") for key in args_dict.keys(): print("- {}: {}".format(key, args_dict[key])) print("------------------------------------") print(args.mode) if args.dataset == 'ucf101': num_class = 101 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'hmdb51': num_class = 51 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'kinetics': num_class = 400 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'something': num_class = 174 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'somethingv2': num_class = 174 rgb_read_format = "img_{:05d}.jpg" elif args.dataset == 'NTU_RGBD': num_class = 120 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'tinykinetics': num_class = 150 rgb_read_format = "{:05d}.jpg" else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, non_local=args.non_local) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std # Optimizer s also support specifying per-parameter options. # To do this, pass in an iterable of dict s. # Each of them will define a separate parameter group, # and should contain a params key, containing a list of parameters belonging to it. # Other keys should match the keyword arguments accepted by the optimizers, # and will be used as optimization options for this group. policies = model.get_optim_policies(args.dataset) train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model_dict = model.state_dict() if args.arch == "resnet50": new_state_dict = {} #model_dict div = False roll = True elif args.arch == "resnet34": pretrained_dict = {} new_state_dict = {} #model_dict for k, v in model_dict.items(): if ('fc' not in k): new_state_dict.update({k: v}) div = False roll = True elif (args.arch[:3] == "TCM"): pretrained_dict = {} new_state_dict = {} #model_dict for k, v in model_dict.items(): if ('fc' not in k): new_state_dict.update({k: v}) div = True roll = False if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 1 train_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, mode=args.mode, image_tmpl=args.rgb_prefix + rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format, img_start_idx=args.img_start_idx, transform=torchvision.transforms.Compose([ GroupScale((240, 320)), # GroupScale(int(scale_size)), train_augmentation, Stack(roll=roll), ToTorchFormatTensor(div=div), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, mode=args.mode, image_tmpl=args.rgb_prefix + rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format, img_start_idx=args.img_start_idx, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale((240, 320)), # GroupScale((224)), # GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=roll), ToTorchFormatTensor(div=div), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) output_list = [] if args.evaluate: prec1, score_tensor = validate(val_loader, model, criterion, temperature=100) output_list.append(score_tensor) save_validation_score(output_list, filename='score.pt') print("validation score saved in {}".format('/'.join( (args.val_output_folder, 'score_inf5.pt')))) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch temperature = train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1, score_tensor = validate(val_loader, model, criterion, temperature=temperature) output_list.append(score_tensor) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) output_best = 'Best Prec@1: %.3f\n' % (best_prec1) print(output_best) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) # save validation score save_validation_score(output_list) print("validation score saved in {}".format('/'.join( (args.val_output_folder, 'score.pt'))))
scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() print("crop", crop_size, "scale", scale_size) policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) print(model) cudnn.benchmark = True # Data loading code if (args.modality != 'RGBDiff') | (args.modality != 'RGBFlow'): normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB':
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 else: data_length = 5 # generate 5 displacement map, using 6 RGB images model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, new_length=data_length) model = model.to(device) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() if device.type == 'cuda': model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict'], strict=True) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.lr_steps, gamma=0.1) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(0, args.epochs): scheduler.step() if epoch < args.start_epoch: continue # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) writer.close()
if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) print(devices) net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] net.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) # ToDo: why # if len(devices) > 1: # cause bug # device = torch.device('cuda:{}'.format(devices[0])) # net = net.to(device) net.eval() data_gen = enumerate(data_loader) total_num = len(data_loader.dataset) output = []
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset '+args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet("", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet("", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)