class ExtructFeatrue(object): def __init__(self): saved = torch.load( '/mnt/workspace/model/activitynet_clip_kinetics600_dpn107_rgb_model/activitynet_clip_600_dpn107_rgb_model_best_074.pth.tar' ) self.model = TSN(201, 3, 'RGB', 'dpn107', 1) self.train_augmentation = self.model.get_augmentation() self.input_mean = self.model.input_mean self.input_std = self.model.input_std self.softmax = nn.Softmax(dim=-1).cuda() self.model = nn.DataParallel(self.model) self.model.load_state_dict(saved['state_dict']) self.base_model = nn.DataParallel(self.model.module.base_model).cuda() self.new_fc = nn.DataParallel(self.model.module.new_fc).cuda() self.model.eval() self.base_model.eval() self.new_fc.eval() def loadFeatrue(self, x): midfeature = self.base_model(x) classfeature = self.softmax(self.new_fc(midfeature)) return midfeature, classfeature
def train(args): # parse config place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): config = parse_config(args.config) train_config = merge_configs(config, 'train', vars(args)) #根据自己定义的网络,声明train_model train_model = TSN(args.batch_size, 32) train_model.train() opt = fluid.optimizer.Momentum(0.001, 0.9, parameter_list=train_model.parameters()) if args.pretrain: # 加载上一次训练的模型,继续训练 model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model') train_model.load_dict(model) # build model if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # 迭代器,每次输送一个批次的数据,每个数据的形式(imgs, label) train_reader = UCFReader('train', train_config).create_reader() epochs = args.epoch for i in range(epochs): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0] for x in data]).astype( 'float32' ) # dy_x_data=imgs shape:(batch_size, seg_num*seglen, 3, target_size, target_size) y_data = np.array([x[1] for x in data]).astype( 'int64') # y_data=label shape:(batch_size,) img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label = fluid.layers.reshape(label, [args.batch_size, 1]) label.stop_gradient = True # 反向求导时,只有目标函数中的label部分对label求导(而不是label与img融合求导) out, acc = train_model(img, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() opt.minimize(avg_loss) train_model.clear_gradients() if batch_id % 100 == 0: print("Loss at epoch {} step {}: {}, acc: {}".format( i, batch_id, avg_loss.numpy(), acc.numpy())) fluid.dygraph.save_dygraph(train_model.state_dict(), args.save_dir + '/tsn_model') print("Final loss: {}".format(avg_loss.numpy()))
def rgb_model(): net = TSN(2, 1, 'RGB', base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout) checkpoint = torch.load(args.rgb_weights) # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) net.load_state_dict(checkpoint['state_dict']) return net
def opf_model(): net = TSN(2, 1, 'Flow', base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout) checkpoint = torch.load("475_inceptionv4__flow_model_best.pth.tar") # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) net.load_state_dict(checkpoint['state_dict']) return net
def get_executor(use_gpu=True): # torch_module = MobileNetV2(n_class=27) # if not os.path.exists("mobilenetv2_jester_online.pth.tar"): # checkpoint not downloaded # print('Downloading PyTorch checkpoint...') # import urllib.request # url = 'https://file.lzhu.me/projects/tsm/models/mobilenetv2_jester_online.pth.tar' # urllib.request.urlretrieve(url, './mobilenetv2_jester_online.pth.tar') # torch_module.load_state_dict(torch.load("mobilenetv2_jester_online.pth.tar")) # torch_inputs = (torch.rand(1, 3, 224, 224), # torch.zeros([1, 3, 56, 56]), # torch.zeros([1, 4, 28, 28]), # torch.zeros([1, 4, 28, 28]), # torch.zeros([1, 8, 14, 14]), # torch.zeros([1, 8, 14, 14]), # torch.zeros([1, 8, 14, 14]), # torch.zeros([1, 12, 14, 14]), # torch.zeros([1, 12, 14, 14]), # torch.zeros([1, 20, 7, 7]), # torch.zeros([1, 20, 7, 7])) torch_module = TSN(2, 1, 'RGB', base_model='mobilenetv2', consensus_type='avg', img_feature_dim=256, pretrain='imagenet', # is_shift=False, shift_div=8, shift_place='blockres', is_shift=True, shift_div=8, shift_place='blockres', # non_local='_nl' in './checkpoint/TSM_HockeyFights_RGB_mobilenetv2_shift8_blockres_avg_segment8_e100/ckpt.best.pth.tar', non_local='_nl' in pt_path, ) checkpoint = torch.load( # './checkpoint/TSM_HockeyFights_RGB_mobilenetv2_shift8_blockres_avg_segment8_e100/ckpt.best.pth.tar') pt_path) checkpoint = checkpoint['state_dict'] # base_dict = {('base_model.' + k).replace('base_model.fc', 'new_fc'): v for k, v in list(checkpoint.items())} base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint.items())} replace_dict = {'base_model.classifier.weight': 'new_fc.weight', 'base_model.classifier.bias': 'new_fc.bias', } for k, v in replace_dict.items(): if k in base_dict: base_dict[v] = base_dict.pop(k) torch_module.load_state_dict(base_dict) torch_inputs = (torch.rand(1, 24, 224, 224)) # torch_inputs = torch.rand(1, 24, 224, 224) if use_gpu: target = 'cuda' else: target = 'llvm -mcpu=cortex-a72 -target=armv7l-linux-gnueabihf' return torch2executor(torch_module, torch_inputs, target)
def __init__(self): super(TSN_BIT, self).__init__() self.tsn = TSN(num_class, num_segments=num_segments, modality=modality, base_model=arch, consensus_type=crop_fusion_type, dropout=0.7) self.activation = nn.LeakyReLU() self.fc1 = nn.Linear(51, 32) self.fc2 = nn.Linear(32, 21) self.model_name = '2019-01-20_23-57-32.pth' self._load_tsn_rgb_weight()
def __init__(self): super(TSN_BIT, self).__init__() self.tsn = TSN(num_class, num_segments=data_length, modality=modality, base_model=arch, consensus_type=crop_fusion_type, dropout=0.7) self.activation = nn.LeakyReLU() self.fc1 = nn.Linear(101, 32) self.fc2 = nn.Linear(32, 8) self.model_name = 'TSN_Flow_2019-01-23_17-06-15.pth' # self._load_tsn_flow_weight() self._load_pretrained_model(self.model_name)
def load_net(RGBweights, Flowweights): # weights: model weight global RGBnet global Flownet global num_class #******************* load RGB Net ********************** print('Loading RGB Net......') RGBnet = TSN(num_class, 1, 'RGB', base_model='BNInception', consensus_type='avg', dropout=0.7) checkpoint = torch.load(RGBweights) base_dict_RGB = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} RGBnet.load_state_dict(base_dict_RGB) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) #******************* load RGB Net ********************** print('Loading Flow Net......') Flownet = TSN(num_class, 1, 'Flow', base_model='BNInception', consensus_type='avg', dropout=0.7) checkpoint = torch.load(Flowweights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict_Flow = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} Flownet.load_state_dict(base_dict_Flow)
def __init__(self): super(TSN_BIT, self).__init__() self.tsn = TSN(num_class, num_segments=num_segments, modality=modality, base_model=arch, consensus_type=crop_fusion_type, dropout=0.7) self.activation = nn.LeakyReLU() self.fc1 = nn.Linear(101, 32) self.fc2 = nn.Linear(32, 8)
def init_model(num_classes, new_length, args): model = TSN(num_classes, args.num_segments, args.modality, base_model=args.arch, new_length=new_length, consensus_type=args.consensus_type, dropout=0.5, partial_bn=False) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_size = model.input_size input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() cropping = torchvision.transforms.Compose([ GroupScale(scale_size), GroupCenterCrop(input_size), ]) return model
def main(): args = parser.parse_args() if args.dataset == 'ucf101': args.num_class = 101 elif args.dataset == 'hmdb51': args.num_class = 51 elif args.dataset == 'kinetics': args.num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) place = fluid.CUDAPlace(0) with fluid.dygraph.guard(place): model = TSN(args.num_class, args.num_segments, args.modality, args.arch, dropout=0) args.short_size = model.scale_size args.target_size = model.crop_size args.input_mean = model.input_mean args.input_std = model.input_std * 3 state_dict = fluid.dygraph.load_dygraph(args.load_path)[0] model.set_dict(state_dict) test_reader = KineticsReader('test', args, args.test_list).create_reader() log = open(args.log_path, 'w') model.eval() avg_acc = AverageMeter() for batch_id, data in enumerate(test_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([[x[1]] for x in data]).astype('int64') img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label.stop_gradient = True out, acc = model(img, label) avg_acc.update(acc.numpy()[0], label.shape[0]) if (batch_id + 1) % args.print_freq == 0: output = 'Test batch_id:{} | acc {} | avg acc:{}'.format( batch_id + 1, acc.numpy()[0], avg_acc.avg) print(output) log.write(output + '\n') log.flush() output = 'Test Avg acc:{}'.format(avg_acc.avg) print(output) log.write(output + '\n') log.flush() log.close()
def eval(args): # parse config config = parse_config(args.config) val_config = merge_configs(config, 'valid', vars(args)) # print_configs(val_config, "Valid") with fluid.dygraph.guard(): val_model = TSN(args.batch_size, 32) label_dic = np.load('work/UCF-101_jpg/label_dir.npy', allow_pickle=True).item() label_dic = {v: k for k, v in label_dic.items()} # get infer reader val_reader = UCFReader('valid', val_config).create_reader() # if no weight files specified, exit() if args.weights: weights = args.weights else: print("model path must be specified") exit() para_state_dict, _ = fluid.load_dygraph(weights) val_model.load_dict(para_state_dict) val_model.eval() acc_list = [] for batch_id, data in enumerate(val_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([[x[1]] for x in data]).astype('int64') img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label = fluid.layers.reshape(label, [args.batch_size, 1]) label.stop_gradient = True out, acc = val_model(img, label) print('batch_id=', batch_id, 'acc=', acc.numpy()) acc_list.append(acc.numpy()) print("验证集准确率为:{}".format(np.mean(acc_list)))
def infer(args): # parse config config = parse_config(args.config) infer_config = merge_configs(config, 'infer', vars(args)) # print_configs(infer_config, "Infer") with fluid.dygraph.guard(): infer_model = TSN(args.batch_size, 32) label_dic = np.load('work/UCF-101_jpg/label_dir.npy', allow_pickle=True).item() label_dic = {v: k for k, v in label_dic.items()} # get infer reader infer_reader = UCFReader('infer', infer_config).create_reader() # if no weight files specified, exit() if args.weights: weights = args.weights else: print("model path must be specified") exit() para_state_dict, _ = fluid.load_dygraph(weights) # print('para_state_dict:', para_state_dict) infer_model.load_dict(para_state_dict) infer_model.eval() for batch_id, data in enumerate(infer_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([[x[1]] for x in data]) img = fluid.dygraph.to_variable(dy_x_data) out = infer_model(img) label_id = fluid.layers.argmax(out, axis=1).numpy()[0] print("实际标签{}, 预测结果{}".format(y_data, label_dic[label_id]))
args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'VideoNet': num_class = 353 else: raise ValueError('Unknown dataset ' + args.dataset) net = TSN(num_class, 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } net.load_state_dict(base_dict) if args.test_crops == 1: cropping = torchvision.transforms.Compose([
pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(0) res.append(correct_k.mul_(100.0 / batch_size)) return res categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality) num_class = len(categories) net = TSN(num_class, args.test_segments if args.crop_fusion_type in ['TRN','TRNmultiscale'] else 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, img_feature_dim=args.img_feature_dim, ) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} net.load_state_dict(base_dict) if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10:
pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(0) res.append(correct_k.mul_(100.0 / batch_size)) return res categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( "emmanuelle", "RGBFlow") num_class = len(categories) originalNet = TSN( 27, args.test_segments if args.consensus_type in ['MLP'] else 1, "RGBFlow", base_model=args.arch, consensus_type=args.consensus_type, img_feature_dim=args.img_feature_dim, ) torch.save(originalNet, "emmanuelle.pth") emmanuelleNet = originalNet.base_model print( "-----------------------------------------------------------------------------------------------------------------" ) # print("Emmanuelle Net:", type(emmanuelleNet)) print("Children:", len(list(emmanuelleNet.named_children()))) for name, child in emmanuelleNet.named_children(): print(" ", name.ljust(30), ":", child) for op in emmanuelleNet._op_list:
parser.add_argument('--weight', type=str) args = parser.parse_args() # Get dataset categories. categories_file = 'pretrain/{}_categories.txt'.format(args.dataset) categories = [line.rstrip() for line in open(categories_file, 'r').readlines()] num_class = len(categories) args.arch = 'InceptionV3' if args.dataset == 'moments' else 'BNInception' # Load model. net = TSN(num_class, args.test_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, img_feature_dim=args.img_feature_dim, print_spec=False) weights = args.weight checkpoint = torch.load(weights) #print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } net.load_state_dict(base_dict) net.cuda().eval()
def main(): torch.set_printoptions(precision=6) global args, best_prec1 args = parser.parse_args() #导入参数设置数据集类数量 if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'cad': num_class = 8 else: raise ValueError('Unknown dataset ' + args.dataset) """ #导入模型,输入包含分类的类别数: # num_class;args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3; # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等; # 采用哪种模型:args.arch,比如resnet101,BNInception等; # 不同输入snippet的融合方式:args.consensus_type,比如avg等; # dropout参数:args.dropout。 """ model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() """ 接着main函数的思路,前面这几行都是在TSN类中定义的变量或者方法,model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()是设置多GPU训练模型。 args.resume这个参数主要是用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练, 因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径。 其中checkpoint = torch.load(args.resume)是用来导入已训练好的模型。 model.load_state_dict(checkpoint[‘state_dict’])是完成导入模型的参数初始化model这个网络的过程,load_state_dict是torch.nn.Module类中重要的方法之一。 """ if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 """ 接下来是main函数中的第二部分:数据导入。首先是自定义的TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型, 一般而言在PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类,比如此处的TSNDataSet类,然后通过重写初始化函数__init__和__getitem__方法来读取数据。 torch.utils.data.Dataset类型的数据并不能作为模型的输入,还要通过torch.utils.data.DataLoader类进一步封装, 这是因为数据读取类TSNDataSet返回两个值,第一个值是Tensor类型的数据,第二个值是int型的标签, 而torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。 对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了。 """ train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=3, pin_memory=True) """ 接下来就是main函数的第三部分:训练模型。这里包括定义损失函数、优化函数、一些超参数设置等,然后训练模型并在指定epoch验证和保存模型。 adjust_learning_rate(optimizer, epoch, args.lr_steps)是设置学习率变化策略,args.lr_steps是一个列表,里面的值表示到达多少个epoch的时候要改变学习率, 在adjust_learning_rate函数中,默认是修改学习率的时候修改成当前的0.1倍。 train(train_loader, model, criterion, optimizer, epoch)就是训练模型,输入包含训练数据、模型、损失函数、优化函数和要训练多少个epoch。 最后的if语句是当训练epoch到达指定值的时候就进行一次模型验证和模型保存,args.eval_freq这个参数就是用来控制保存的epoch值。 prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))就是用训练好的模型验证测试数据集。 最后的save_checkpoint函数就是保存模型参数(model)和其他一些信息,这里我对源代码做了修改,希望有助于理解,该函数中主要就是调用torch.save(mode, save_path)来保存模型。 模型训练函数train和模型验证函数validate函数是重点,后面详细介绍。 """ # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) ''' optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) ''' # try Adam instead. optimizer = torch.optim.Adam(policies, args.lr) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'meitu': num_class = 50 else: raise ValueError('Unknown dataset ' + args.dataset) net = TSN(num_class, 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, before_softmax=True, dropout=args.dropout) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } net.load_state_dict(base_dict)
def main(): global args, best_prec1, class_to_name parser.add_argument('--class_index', type=str, help='class index file') args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'something': num_class = 174 else: raise ValueError('Unknown dataset ' + args.dataset) if args.dataset == 'something': img_prefix = '' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = { idx: line.strip().replace(' ', '-') for idx, line in enumerate(content) } else: img_prefix = 'image_' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \ for line in content} with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(args), opt_file) if not (args.consensus_type == 'lstm' or args.consensus_type == 'conv_lstm'): args.lstm_out_type = None model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, lstm_out_type=args.lstm_out_type, lstm_layers=args.lstm_layers, lstm_hidden_dims=args.lstm_hidden_dims, conv_lstm_kernel=args.conv_lstm_kernel, bi_add_clf=args.bi_add_clf, bi_out_dims=args.bi_out_dims, bi_rank=args.bi_rank, bi_att_softmax=args.bi_att_softmax, bi_filter_size=args.bi_filter_size, bi_dropout=args.bi_dropout, bi_conv_dropout=args.bi_conv_dropout, get_att_maps=True, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # print(model) # input('...') model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) # print(model) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) # input('...') else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) rev_normalize = ReverseGroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 10 # data_length = 5 if args.val_reverse: val_temp_transform = ReverseFrames(size=data_length * args.num_segments) print('using reverse val') elif args.val_shuffle: val_temp_transform = ShuffleFrames(size=data_length * args.num_segments) print('using shuffle val') else: val_temp_transform = IdentityTransform() print('using normal val') val_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=img_prefix + "{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, temp_transform=val_temp_transform, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w') print('visualizing...') val_logger = os.path.join(args.result_path, 'visualize.log') validate(val_loader, model, 0, val_logger=val_logger, rev_normalize=rev_normalize) return
def main(): global args, best_prec1 args = Parse_args() log.l.info('Input command:\n ===========> python ' + ' '.join(sys.argv) + ' ===========>') if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'mm': num_class = 500 elif args.dataset == 'thumos14': num_class = 21 else: raise ValueError('Unknown dataset ' + args.dataset) log.l.info( '============= prepare the model and model\'s parameters =============' ) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: log.l.info( '============== train from checkpoint (finetune mode) =================' ) if os.path.isfile(args.resume): log.l.info(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) log.l.info(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: log.l.info(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True log.l.info('============== Now, loading data ... ==============\n') if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(PerFrameData( args.frames_root, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, data_gap=args.data_gap, test_mode=False, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.data_workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(PerFrameData( args.frames_root, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, data_gap=args.data_gap, test_mode=True, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.data_workers, pin_memory=True) log.l.info( '================= Now, define loss function and optimizer ==============' ) weight = torch.from_numpy(np.array([1] + [3] * (num_class - 1))) if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: log.l.info( ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: log.l.info('Need val the data first...') validate(val_loader, model, criterion, 0) log.l.info( '\n\n===================> TRAIN and VAL begins <===================\n') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
parser.add_argument('--result', type=str, default='result') args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset '+args.dataset) net = TSN(num_class, 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout) weights = osp.join("weights",args.weights) checkpoint = torch.load(weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) net.load_state_dict(checkpoint['state_dict']) if args.modality != 'RGBDiff': normalize = GroupNormalize(net.input_mean, net.input_std) else: normalize = IdentityTransform() if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10:
def main(): parser = options() args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'saag01': num_class = 2 else: raise ValueError('Unknown dataset ' + args.dataset) if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=0.5, partial_bn=False) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_size = model.input_size input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() cropping = torchvision.transforms.Compose([ GroupScale(scale_size), GroupCenterCrop(input_size), ]) checkpoint = torch.load(args.checkpoint) start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] state_dict = checkpoint['state_dict'] # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model.load_state_dict(state_dict) test_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.test_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.img_prefix + "_{:05d}" + args.ext if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "_{}_{:05d}" + args.ext, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(input_mean, input_std), ]), custom_prefix=args.custom_prefix), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) ### Test ### test(model, test_loader, args)
parser.add_argument('--flow_prefix', type=str, default='') args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset '+args.dataset) net = TSN(num_class, 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} net.load_state_dict(base_dict) if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose([
def main(): global args args = parser.parse_args() print("------------------------------------") print("Environment Versions:") print("- Python: {}".format(sys.version)) print("- PyTorch: {}".format(torch.__version__)) print("- TorchVison: {}".format(torchvision.__version__)) args_dict = args.__dict__ print("------------------------------------") print(args.arch+" Configurations:") for key in args_dict.keys(): print("- {}: {}".format(key, args_dict[key])) print("------------------------------------") if args.dataset == 'ucf101': num_class = 101 rgb_read_format = "{:06d}.jpg" # Format for THUMOS14 videos # rgb_read_format = "{:05d}.jpg" elif args.dataset == 'hmdb51': num_class = 51 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'kinetics': num_class = 400 rgb_read_format = "{:04d}.jpg" elif args.dataset == 'something': num_class = 174 rgb_read_format = "{:04d}.jpg" else: raise ValueError('Unknown dataset '+args.dataset) model = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std if _CUDA: model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # CUDA print_model(model) if not _CUDA: model = torch.nn.DataParallel(model) # CPU print("pretrained_parts: ", args.pretrained_parts) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) if _CUDA: checkpoint = torch.load(args.resume) # CUDA else: checkpoint = torch.load(args.resume, map_location='cpu') # CPU # if not checkpoint['lr']: if "lr" not in checkpoint.keys(): args.lr = input("No 'lr' attribute found in resume model, please input the 'lr' manually: ") args.lr = float(args.lr) else: args.lr = checkpoint['lr'] args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch: {}, lr: {})" .format(args.resume, checkpoint['epoch'], args.lr))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) else: print("Please specify the checkpoint to pretrained model") return cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': #input_mean = [0,0,0] #for debugging normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 end = time.time() # data_loader = torch.utils.data.DataLoader( dataset = TSNDataSet("", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix+rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=True), ToTorchFormatTensor(div=False), #Stack(roll=(args.arch == 'C3DRes18') or (args.arch == 'ECO') or (args.arch == 'ECOfull') or (args.arch == 'ECO_2FC')), #ToTorchFormatTensor(div=(args.arch != 'C3DRes18') and (args.arch != 'ECO') and (args.arch != 'ECOfull') and (args.arch != 'ECO_2FC')), normalize, ]), test_mode=True, window_size=_WINDOW_SIZE, window_stride=_WINDOW_STRIDE); data_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=collate_fn) # criterion = torch.nn.CrossEntropyLoss().cuda() # predict(data_loader, model, criterion, 0) predict(dataset, model, criterion=None, iter=0) # profile_model(model) elapsed_time = time.time() - end print("STATS_TOT_WINDOWS={0}, Total prediction time={1}".format(STATS_TOT_WINDOWS, elapsed_time)) return
if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'virat': num_class = 8 else: raise ValueError('Unknown dataset ' + args.dataset) # ipdb.set_trace() net = TSN(num_class, 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout) param_count = 0 for n, param in net.named_parameters(): param_count += 1 print(param_count, n, param.requires_grad, param.size()) if args.test_crops == 1: cropping = transforms.Compose( [GroupScale(net.scale_size), GroupCenterCrop(net.input_size)]) elif args.test_crops == 10: cropping = transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)])
categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'MFF', args.dataset, args.modality, args.arch, 'segment%d' % args.num_segments, '%df1c' % args.num_motion ]) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, num_motion=args.num_motion, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn, dataset=args.dataset) # load pre trained weights checkpoint = torch.load( "pretrained_models/MFF_jester_RGBFlow_BNInception_segment4_3f1c_best.pth.tar" ) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[2:]): v for k, v in list(checkpoint['state_dict'].items())[:-6]
ei = 0 while(os.path.exists(logdir + '/%d/' % ei)): ei = ei + 1 ################################# # main loop ################################# for di in range(0, args.num_experiments): p['logdir'] = './%s/%s/%d/%d/' % (args.logdir, expdir, ei, di) if(not os.path.exists(p['logdir'])): os.makedirs(p['logdir']) model = [] model = TSN(p, dataset_train) model = model.cuda(device) optim = get_optimizer(args, model) max_perf_val = 0.0 max_perf_aux = 0.0 for epoch in range(0, args.num_epochs): stats_train = process_epoch('train', epoch, p, dataloader_train, model, optim) stats_val = process_epoch('val', epoch, p, dataloader_val, model) perf_val = stats_val['top1.cause'] + stats_val['top1.effect'] perf_val_aux = stats_val['top2.cause'] + stats_val['top2.effect'] if(perf_val >= max_perf_val): if(perf_val_aux >= max_perf_aux): max_perf_val = perf_val
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
parser.add_argument('--consensus_type', type=str, default='TRNmultiscale') parser.add_argument('--weights', type=str) args = parser.parse_args() # Get dataset categories. categories_file = 'pretrain/{}_categories.txt'.format(args.dataset) categories = [line.rstrip() for line in open(categories_file, 'r').readlines()] num_class = len(categories) args.arch = 'InceptionV3' if args.dataset == 'moments' else 'BNInception' # Load model. net = TSN(2, args.test_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, img_feature_dim=args.img_feature_dim, print_spec=False) net = torch.nn.DataParallel(net) import glob checkpoint_names = glob.glob('checkpoint_*.pth') save_acc_dict = {} best_acc = 0.0 best_cp = None torch.manual_seed(1111) import pickle from tqdm import tqdm for checkpoint_name in tqdm(checkpoint_names):
def main(): global args, best_prec1 num_class = 4 rgb_read_format = "{:d}.jpg" model = TSN(num_class, args.num_segments, args.pretrained_parts, 'RGB', base_model='ECO', consensus_type='identity', dropout=0.3, partial_bn=True) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std # Optimizer s also support specifying per-parameter options. # To do this, pass in an iterable of dict s. # Each of them will define a separate parameter group, # and should contain a params key, containing a list of parameters belonging to it. # Other keys should match the keyword arguments accepted by the optimizers, # and will be used as optimization options for this group. policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() model_dict = model.state_dict() print("pretrained_parts: ", args.pretrained_parts) model_dir = args.model_path new_state_dict = torch.load(model_dir)['state_dict'] un_init_dict_keys = [ k for k in model_dict.keys() if k not in new_state_dict ] print("un_init_dict_keys: ", un_init_dict_keys) print("\n------------------------------------") for k in un_init_dict_keys: new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_() if 'weight' in k: if 'bn' in k: print("{} init as: 1".format(k)) constant_(new_state_dict[k], 1) else: print("{} init as: xavier".format(k)) xavier_uniform_(new_state_dict[k]) elif 'bias' in k: print("{} init as: 0".format(k)) constant_(new_state_dict[k], 0) print("------------------------------------") model.load_state_dict(new_state_dict) cudnn.benchmark = True # Data loading code normalize = GroupNormalize(input_mean, input_std) data_length = 1 val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality='RGB', image_tmpl=rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=True), ToTorchFormatTensor(div=False), normalize, ])), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) model.eval() for i, (input, target) in enumerate(val_loader): target = target.cuda() input_var = input target_var = target output = model(input_var) _, pred = output.data.topk(1, 1, True, True) print(pred, target) print('done')
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset '+args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet("", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet("", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)