def get_set_loader(): pure = True if _MODALITY == 'rgb' or _MODALITY == 'flow' else False modlist = ["rgb", "rgbdsc", "flyflow", "edr1"] train_transform = torchvision.transforms.Compose([ GroupScale(size=256), GroupRandomCrop(size=_IMAGE_SIZE), Stack(modality=_MODALITY), ToTorchFormatTensor(pure=pure) ]) test_transform = torchvision.transforms.Compose([ GroupScale(size=256), GroupCenterCrop(size=_IMAGE_SIZE), Stack(modality=_MODALITY), ToTorchFormatTensor(pure=pure) ]) train_dataset = TSNDataSet( "", _TRAIN_LIST, num_segments=1, new_length=64, modality=_MODALITY, image_tmpl="img_{:05d}.jpg" if _MODALITY in modlist else "flow_" + "{}_{:05d}.jpg", transform=train_transform) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=_BATCH_SIZE, shuffle=True, num_workers=_NUM_W, collate_fn=my_collate) test_dataset = TSNDataSet( "", _TEST_LIST, num_segments=1, new_length=64, modality=_MODALITY, image_tmpl="img_{:05d}.jpg" if _MODALITY in modlist else "flow_" + "{}_{:05d}.jpg", transform=test_transform) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=_TEST_BATCH_SIZE, shuffle=True, num_workers=_NUM_W, collate_fn=my_collate) return train_loader, test_loader
def run(self): if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(self.net.scale_size), GroupCenterCrop(self.net.input_size) ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(self.net.input_size, self.net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) if self.modality == 'RGB': root_path = 'record/test/temp_chunk/' test_list = 'rgb_video_test_list.txt' else: root_path = 'record/test/temp_opf/' test_list = 'opf_video_test_list.txt' data_set = TSNDataSet(root_path, test_list, num_segments=args.test_segments, new_length=1 if self.modality == "RGB" else 5, modality=self.modality, image_tmpl="frame_{:06d}.jpg" if self.modality in ["RGB", "RGBDiff"] else "{:06d}.jpg", test_mode=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=False), ToTorchFormatTensor(div=True), GroupNormalize(self.net.input_mean, self.net.input_std), ])) data_loader = torch.utils.data.DataLoader(data_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) self.net.cuda() self.net.eval() data_gen = enumerate(data_loader) output = [] for i, (keys, data, label) in data_gen: a = data.chunk(args.test_segments, 1) res = [] for j in a: rst = self.eval_video((i, j, label)) res.append(rst) output.append((res, label[0])) if self.modality == 'RGB': rgb_whole_pred[str(StartFrame)] = np.mean(output[0][0], axis=0) else: opf_whole_pred[str(StartFrame)] = np.mean(output[0][0], axis=0) return
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.test_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code # Four types of input modalities for two-stream ConvNets (one stream spatial and the other temporal): a single RGB image, stacked RGB difference, # stacked optical flow field, and stacked warped optical flow field; the spatial stream ConvNet operates on a single RGB images, # and the temporal stream ConvNet takes a stack of consecutive optical flow fields as input. # A single RGB image usually encodes static appearance at a specific time point and lacks the contextual information about previous and next frames. # RGB difference between two consecutive frames describe the appearance change, which may correspond to the motion salient region. # Optical flow fields may not concentrate on the human action; the warped optical flow suppresses the background motion and makes motion concentrate # on the actor. if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 # Division between train and val set train_loader = torch.utils.data.DataLoader( TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack( roll=(args.arch in ['BNInception', 'InceptionV3']) ), # Batch-Normalization-Inception, InceptionV3: evolution of InceptionV2 of GoogleNet ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose([ GroupOverSample(net.input_size, net.scale_size) ]) else: raise ValueError("Only 1 and 10 crops are supported while we got {}".format(args.test_crops)) data_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.val_list, num_segments=args.test_segments, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl=prefix, test_mode=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
if "RGBFlow" == 'RGB': data_length = 1 elif "RGBFlow" in ['Flow', 'RGBDiff']: data_length = 5 elif "RGBFlow" == 'RGBFlow': data_length = args.num_motion data_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.test_segments, new_length=data_length, modality="RGBFlow", image_tmpl=prefix, dataset="emmanuelle", test_mode=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=("RGBFlow" == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), GroupNormalize(originalNet.input_mean, originalNet.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=False) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)]
def main(): global args, best_prec1, class_to_name parser.add_argument('--class_index', type=str, help='class index file') args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'something': num_class = 174 else: raise ValueError('Unknown dataset ' + args.dataset) if args.dataset == 'something': img_prefix = '' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = { idx: line.strip().replace(' ', '-') for idx, line in enumerate(content) } else: img_prefix = 'image_' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \ for line in content} with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(args), opt_file) if not (args.consensus_type == 'lstm' or args.consensus_type == 'conv_lstm'): args.lstm_out_type = None model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, lstm_out_type=args.lstm_out_type, lstm_layers=args.lstm_layers, lstm_hidden_dims=args.lstm_hidden_dims, conv_lstm_kernel=args.conv_lstm_kernel, bi_add_clf=args.bi_add_clf, bi_out_dims=args.bi_out_dims, bi_rank=args.bi_rank, bi_att_softmax=args.bi_att_softmax, bi_filter_size=args.bi_filter_size, bi_dropout=args.bi_dropout, bi_conv_dropout=args.bi_conv_dropout, get_att_maps=True, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # print(model) # input('...') model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) # print(model) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) # input('...') else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) rev_normalize = ReverseGroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 10 # data_length = 5 if args.val_reverse: val_temp_transform = ReverseFrames(size=data_length * args.num_segments) print('using reverse val') elif args.val_shuffle: val_temp_transform = ShuffleFrames(size=data_length * args.num_segments) print('using shuffle val') else: val_temp_transform = IdentityTransform() print('using normal val') val_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=img_prefix + "{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, temp_transform=val_temp_transform, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w') print('visualizing...') val_logger = os.path.join(args.result_path, 'visualize.log') validate(val_loader, model, 0, val_logger=val_logger, rev_normalize=rev_normalize) return
GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose([ GroupOverSample(net.input_size, net.scale_size) ]) else: raise ValueError("Only 1 and 10 crops are supported while we got {}".format(args.test_crops)) data_loader = torch.utils.data.DataLoader( TSNDataSet("", args.test_list, num_segments=args.test_segments, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl="im{}.jpg", test_mode=True, save_scores=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) net = net.cuda() net.eval() data_gen = enumerate(data_loader) total_num = len(data_loader.dataset) output = []
def main(): global args, best_prec1 num_class = 4 rgb_read_format = "{:d}.jpg" model = TSN(num_class, args.num_segments, args.pretrained_parts, 'RGB', base_model='ECO', consensus_type='identity', dropout=0.3, partial_bn=True) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std # Optimizer s also support specifying per-parameter options. # To do this, pass in an iterable of dict s. # Each of them will define a separate parameter group, # and should contain a params key, containing a list of parameters belonging to it. # Other keys should match the keyword arguments accepted by the optimizers, # and will be used as optimization options for this group. policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() model_dict = model.state_dict() print("pretrained_parts: ", args.pretrained_parts) model_dir = args.model_path new_state_dict = torch.load(model_dir)['state_dict'] un_init_dict_keys = [ k for k in model_dict.keys() if k not in new_state_dict ] print("un_init_dict_keys: ", un_init_dict_keys) print("\n------------------------------------") for k in un_init_dict_keys: new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_() if 'weight' in k: if 'bn' in k: print("{} init as: 1".format(k)) constant_(new_state_dict[k], 1) else: print("{} init as: xavier".format(k)) xavier_uniform_(new_state_dict[k]) elif 'bias' in k: print("{} init as: 0".format(k)) constant_(new_state_dict[k], 0) print("------------------------------------") model.load_state_dict(new_state_dict) cudnn.benchmark = True # Data loading code normalize = GroupNormalize(input_mean, input_std) data_length = 1 val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality='RGB', image_tmpl=rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=True), ToTorchFormatTensor(div=False), normalize, ])), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) model.eval() for i, (input, target) in enumerate(val_loader): target = target.cuda() input_var = input target_var = target output = model(input_var) _, pred = output.data.topk(1, 1, True, True) print(pred, target) print('done')
def get_pred(video_path, caption_path, opt): # options parser = argparse.ArgumentParser( description="TRN testing on the full validation set") # parser.add_argument('dataset', type=str, choices=['something','jester','moments','charades']) # parser.add_argument('modality', type=str, choices=['RGB', 'Flow', 'RGBDiff']) parser.add_argument('--dataset', type=str, default='somethingv2') parser.add_argument('--modality', type=str, default='RGB') parser.add_argument( '--weights', type=str, default= 'model/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar' ) parser.add_argument('--arch', type=str, default="BNInception") parser.add_argument('--save_scores', type=str, default=None) parser.add_argument('--test_segments', type=int, default=8) parser.add_argument('--max_num', type=int, default=-1) parser.add_argument('--test_crops', type=int, default=10) parser.add_argument('--input_size', type=int, default=224) parser.add_argument('--crop_fusion_type', type=str, default='TRNmultiscale', choices=['avg', 'TRN', 'TRNmultiscale']) parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--gpus', nargs='+', type=int, default=None) parser.add_argument('--img_feature_dim', type=int, default=256) parser.add_argument( '--num_set_segments', type=int, default=1, help='TODO: select multiply set of n-frames from a video') parser.add_argument('--softmax', type=int, default=0) args = parser.parse_args() def accuracy(output, target, topk=(1, )): """Computes the precision@k for the specified values of k""" maxk = max(topk) batch_size = target.size(0) prob, pred = output.topk(maxk, 1, True, True) prob = prob.t().data.numpy().squeeze() pred = pred.t().data.numpy().squeeze() return prob, pred categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality, opt) num_class = len(categories) net = TSN(num_class, args.test_segments if args.crop_fusion_type in ['TRN', 'TRNmultiscale'] else 1, args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, img_feature_dim=args.img_feature_dim, opt=opt) try: checkpoint = torch.load(args.weights) except: args.weights = os.path.join(opt.project_root, 'scripts/Eval/', args.weights) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } net.load_state_dict(base_dict) if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) data_loader = torch.utils.data.DataLoader(TSNDataSet( video_path, caption_path, num_segments=args.test_segments, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl=prefix, test_mode=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) net = torch.nn.DataParallel(net.cuda()) net.eval() data_gen = enumerate(data_loader) output = [] def eval_video(video_data): i, data, label = video_data num_crop = args.test_crops if args.modality == 'RGB': length = 3 elif args.modality == 'Flow': length = 10 elif args.modality == 'RGBDiff': length = 18 else: raise ValueError("Unknown modality " + args.modality) input_var = torch.autograd.Variable(data.view(-1, length, data.size(2), data.size(3)), volatile=True) rst = net(input_var) if args.softmax == 1: # take the softmax to normalize the output to probability rst = F.softmax(rst) rst = rst.data.cpu().numpy().copy() if args.crop_fusion_type in ['TRN', 'TRNmultiscale']: rst = rst.reshape(-1, 1, num_class) else: rst = rst.reshape((num_crop, args.test_segments, num_class)).mean(axis=0).reshape( (args.test_segments, 1, num_class)) return i, rst, label[0] max_num = args.max_num if args.max_num > 0 else len(data_loader.dataset) prob_all, pred_all = [], [] for i, (data, label) in data_gen: if i >= max_num: break rst = eval_video((i, data, label)) output.append(rst[1:]) prob, pred = accuracy(torch.from_numpy(np.mean(rst[1], axis=0)), label, topk=(1, 174)) prob_all.append(prob) pred_all.append(pred) return prob_all, pred_all
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(339, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) _, cnn = list(model.named_children())[0] for p in cnn.parameters(): p.requires_grad = False crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # remove if not transfer learning checkpoint = torch.load('/home/ec2-user/mit_weights.pth.tar') model.load_state_dict(checkpoint['state_dict']) for module in list( list(model._modules['module'].children()) [-1].children())[-1].children(): module[-1] = nn.Linear(256, num_class) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True model.cuda() # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.Adam(model.parameters(), args.lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): scheduler.step() # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) summary_writer.close()
def main(): global args, best_loss args = parser.parse_args() check_rootfolders() root_data, train_dict, val_dict = datasets_video.return_dataset( args.dataset, args.modality, args.view) num_class = 1 args.store_name = '_'.join([ 'TRN', args.label_name, args.dataset, args.modality, args.view, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) img_tmpl = '{:06d}.jpg' if args.view == 'body' else 'frame_det_00_{:06d}.bmp' model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn, fix_all_weights=args.fix_all_weights) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # augmentation increase model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( root_data, args.train_dict, args.label_name, num_segments=args.num_segments, phase='Train', new_length=data_length, modality=args.modality, image_tmpl=img_tmpl, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( root_data, args.val_dict, args.label_name, num_segments=args.num_segments, phase='Validation', new_length=data_length, modality=args.modality, image_tmpl=img_tmpl, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() elif args.loss_type == 'mse': criterion = torch.nn.MSELoss().cuda() elif args.loss_type == 'mae': criterion = torch.nn.SmoothL1Loss().cuda() else: # another loss is mse or mae raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: log_val = open( os.path.join(args.root_log, '%s_val.csv' % args.store_name), 'w') validate(val_loader, model, criterion, 0, log_val) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() print("------------------------------------") print("Environment Versions:") print("- Python: {}".format(sys.version)) print("- PyTorch: {}".format(torch.__version__)) print("- TorchVison: {}".format(torchvision.__version__)) args_dict = args.__dict__ print("------------------------------------") print(args.arch + " Configurations:") for key in args_dict.keys(): print("- {}: {}".format(key, args_dict[key])) print("------------------------------------") print(args.mode) if args.dataset == 'ucf101': num_class = 101 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'hmdb51': num_class = 51 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'kinetics': num_class = 400 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'something': num_class = 174 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'tinykinetics': num_class = 150 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'minikinetics': num_class = 150 rgb_read_format = "{:05d}.jpg" else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality, base_model=args.arch, dataset=args.dataset, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) #, rep_flow = args.rep_flow) # print (model) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std # Optimizer s also support specifying per-parameter options. # To do this, pass in an iterable of dict s. # Each of them will define a separate parameter group, # and should contain a params key, containing a list of parameters belonging to it. # Other keys should match the keyword arguments accepted by the optimizers, # and will be used as optimization options for this group. policies = model.get_optim_policies(args.dataset) train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model_dict = model.state_dict() print("pretrained_parts: ", args.pretrained_parts) if args.arch == "resnet50": new_state_dict = {} #model_dict div = False roll = True elif args.arch == "resnet34": pretrained_dict = {} new_state_dict = {} #model_dict for k, v in model_dict.items(): if ('fc' not in k): new_state_dict.update({k: v}) div = False roll = True elif args.arch == "Res3D18": pretrained_dict = {} new_state_dict = {} #model_dict for k, v in model_dict.items(): if ('fc' not in k): new_state_dict.update({k: v}) div = False roll = True elif args.arch == "TSM": pretrained_dict = {} new_state_dict = {} #model_dict for k, v in model_dict.items(): if ('fc' not in k): new_state_dict.update({k: v}) # div = False # roll = True div = True roll = False elif (args.arch == "MS"): pretrained_dict = {} new_state_dict = {} #model_dict for k, v in model_dict.items(): if ('fc' not in k): new_state_dict.update({k: v}) div = True roll = False un_init_dict_keys = [ k for k in model_dict.keys() if k not in new_state_dict ] print("un_init_dict_keys: ", un_init_dict_keys) print("\n------------------------------------") for k in un_init_dict_keys: new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_() if 'weight' in k: if 'bn' in k: print("{} init as: 1".format(k)) constant_(new_state_dict[k], 1) else: print("{} init as: xavier".format(k)) xavier_uniform_(new_state_dict[k]) elif 'bias' in k: print("{} init as: 0".format(k)) constant_(new_state_dict[k], 0) print("------------------------------------") model.load_state_dict(new_state_dict) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 1 train_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, mode=args.mode, image_tmpl=args.rgb_prefix + rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format, transform=torchvision.transforms.Compose([ # GroupScale((240,320)), GroupScale(int(scale_size)), # GroupScale((256)), train_augmentation, Stack(roll=roll), ToTorchFormatTensor(div=div), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, mode=args.mode, image_tmpl=args.rgb_prefix + rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ # GroupScale((240,320)), GroupScale(int(scale_size)), # GroupScale((256)), GroupCenterCrop(crop_size), Stack(roll=roll), ToTorchFormatTensor(div=div), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) output_list = [] if args.evaluate: input_size = scale_size test_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, mode=args.mode, image_tmpl=args.rgb_prefix + rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format, random_shift=False, test_mode=True, transform=torchvision.transforms.Compose([ # GroupScale((240,320)), # GroupScale(int(scale_size)), # GroupCenterCrop(crop_size), GroupFullResSample(scale_size, input_size, flip=False), Stack(roll=roll), ToTorchFormatTensor(div=div), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) prec1, score_tensor = test(test_loader, model, criterion, 0, temperature=100, num_class=num_class) # prec1, score_tensor = validate(val_loader,model,criterion,0, temperature=100) output_list.append(score_tensor) fn = 'score_kinetics.pt' save_validation_score(output_list, filename=fn) # prec1, score_tensor = validate2(val_loader2,model,criterion,0, temperature=100) print("test score saved in {}".format('/'.join( (args.val_output_folder, fn)))) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch temperature = train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1, score_tensor = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), temperature=temperature) output_list.append(score_tensor) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) # save validation score save_validation_score(output_list) print("validation score saved in {}".format('/'.join( (args.val_output_folder, 'score.pt'))))
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join(['TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments]) print('storing name: ' + args.store_name) model = TSN(2, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) checkpoint = torch.load('pretrain/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar', map_location='cpu') base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())} for key in ['consensus.fc_fusion_scales.6.3.bias', 'consensus.fc_fusion_scales.5.3.bias', 'consensus.fc_fusion_scales.4.3.bias', 'consensus.fc_fusion_scales.3.3.bias', 'consensus.fc_fusion_scales.2.3.bias', 'consensus.fc_fusion_scales.1.3.bias', 'consensus.fc_fusion_scales.0.3.bias', 'consensus.fc_fusion_scales.6.3.weight', 'consensus.fc_fusion_scales.5.3.weight', 'consensus.fc_fusion_scales.4.3.weight', 'consensus.fc_fusion_scales.3.3.weight', 'consensus.fc_fusion_scales.2.3.weight', 'consensus.fc_fusion_scales.1.3.weight', 'consensus.fc_fusion_scales.0.3.weight']: del base_dict[key] # print(base_dict) model.load_state_dict(base_dict, strict=False) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # val_loader = torch.utils.data.DataLoader( # TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments, # new_length=data_length, # modality=args.modality, # image_tmpl=prefix, # random_shift=False, # transform=torchvision.transforms.Compose([ # GroupScale(int(scale_size)), # GroupCenterCrop(crop_size), # Stack(roll=(args.arch in ['BNInception','InceptionV3'])), # ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), # normalize, # ])), # batch_size=args.batch_size, shuffle=False, # num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': weight = torch.ones([2]).cuda() weight[0] = 1.2 pos_weight = torch.ones([2]).cuda() #pos_weight[0] = 2 criterion = torch.nn.BCEWithLogitsLoss(weight = weight, pos_weight=pos_weight).cuda() #criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, 0.0001, momentum=args.momentum, weight_decay=args.weight_decay) # if args.evaluate: # validate(val_loader, model, criterion, 0) # return log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth.tar'.format(epoch)) torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth'.format(epoch)) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) ''' consensue_type = avg base_model = resnet_101 dropout : 0.5 ''' model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) #224 crop_size = model.crop_size #256/224 scale_size = model.scale_size # for each modiltiy is different input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() #这里拥有三个augmentation #GroupMultiScaleCrop,GroupRandomHorizontalFlip #here GropMultiScaleCrop ,is a easily method for 裁剪边用固定位置的crop并最终resize 到 224 ,采用的插值方式,为双线性插值 #GroupRandomHorizontalFlip train_augmentation = model.get_augmentation() print(args.gpus) model = model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 #解释说这里为什么要有roll,主要还是考虑到我们所训练的是对于BGR 还是RGB train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="im{}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="im{}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") #see the optim policy for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) # general the lr here is 1e-3 optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #如果说这里是验证过程,如果说不是验证过程 if args.evaluate: validate(val_loader, model, criterion, 0) return viz = vis.Visualizer() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, viz) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, epoch, viz=viz) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'test_crops': model.state_dict(), 'best_prec1': prec1, }, is_best)
def main(): # do some pre_process, such as ignore warning pre_process() global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'streetdance245': num_class = 245 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) # print('Do not parallel: ', model) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # calculate flops from ptflops import get_model_complexity_info macs, params = get_model_complexity_info(model, (1, 9, 224, 224), as_strings=True, print_per_layer_stat=True, verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) set_break() # # input of model is (batch_size, n_seg*c, h, w) # from torchsummary import summary # summary(model, input_size=(1, 9, 224, 224)) # set_break() # print('Parallel module features: ', model.module._modules.keys()) # print('Parallel layer4 :2: ', model.module.base_model.layer4[:2]) # print('Parallel layer4 2 conv1: ', model.module.base_model.layer4[2].conv1) # print('lists: ', len(list(model.module.base_model.children()))) # Input size here is (batch_size, n_seg*c, h, w) # with open('new_model.txt', 'w') as f: # f.write(str(model)) # f.write(str(model.module.base_model._modules.keys())) # for m in model.module.modules(): # if isinstance(m, torch.nn.Conv3d): # print(m) # set_break() # Additional long-range Module # input size here is (batch_size, N_seg, fc_input, 7, 7), fc_input here is 2048 # from torchsummary import summary # summary(model3d, input_size=(4, 3, 2048, 7, 7)) # set_break() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # set_break() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) print('best_prec1:', best_prec1)
def main(): global args args = parser.parse_args() print("------------------------------------") print("Environment Versions:") print("- Python: {}".format(sys.version)) print("- PyTorch: {}".format(torch.__version__)) print("- TorchVison: {}".format(torchvision.__version__)) args_dict = args.__dict__ print("------------------------------------") print(args.arch+" Configurations:") for key in args_dict.keys(): print("- {}: {}".format(key, args_dict[key])) print("------------------------------------") if args.dataset == 'ucf101': num_class = 101 rgb_read_format = "{:06d}.jpg" # Format for THUMOS14 videos # rgb_read_format = "{:05d}.jpg" elif args.dataset == 'hmdb51': num_class = 51 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'kinetics': num_class = 400 rgb_read_format = "{:04d}.jpg" elif args.dataset == 'something': num_class = 174 rgb_read_format = "{:04d}.jpg" else: raise ValueError('Unknown dataset '+args.dataset) model = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std if _CUDA: model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # CUDA print_model(model) if not _CUDA: model = torch.nn.DataParallel(model) # CPU print("pretrained_parts: ", args.pretrained_parts) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) if _CUDA: checkpoint = torch.load(args.resume) # CUDA else: checkpoint = torch.load(args.resume, map_location='cpu') # CPU # if not checkpoint['lr']: if "lr" not in checkpoint.keys(): args.lr = input("No 'lr' attribute found in resume model, please input the 'lr' manually: ") args.lr = float(args.lr) else: args.lr = checkpoint['lr'] args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch: {}, lr: {})" .format(args.resume, checkpoint['epoch'], args.lr))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) else: print("Please specify the checkpoint to pretrained model") return cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': #input_mean = [0,0,0] #for debugging normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 end = time.time() # data_loader = torch.utils.data.DataLoader( dataset = TSNDataSet("", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix+rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=True), ToTorchFormatTensor(div=False), #Stack(roll=(args.arch == 'C3DRes18') or (args.arch == 'ECO') or (args.arch == 'ECOfull') or (args.arch == 'ECO_2FC')), #ToTorchFormatTensor(div=(args.arch != 'C3DRes18') and (args.arch != 'ECO') and (args.arch != 'ECOfull') and (args.arch != 'ECO_2FC')), normalize, ]), test_mode=True, window_size=_WINDOW_SIZE, window_stride=_WINDOW_STRIDE); data_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=collate_fn) # criterion = torch.nn.CrossEntropyLoss().cuda() # predict(data_loader, model, criterion, 0) predict(dataset, model, criterion=None, iter=0) # profile_model(model) elapsed_time = time.time() - end print("STATS_TOT_WINDOWS={0}, Total prediction time={1}".format(STATS_TOT_WINDOWS, elapsed_time)) return
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 else: data_length = 5 # generate 5 displacement map, using 6 RGB images model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, new_length=data_length) model = model.to(device) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() if device.type == 'cuda': model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict'], strict=True) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.lr_steps, gamma=0.1) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(0, args.epochs): scheduler.step() if epoch < args.start_epoch: continue # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) writer.close()
[GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) data_loader = torch.utils.data.DataLoader( TSNDataSet( args.sources, args.test_list, timesteps=args.timesteps, #test_segments=args.test_segments, #sampling_method=args.sampling_method, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl="image_{:05d}.jpg" if args.modality in ['RGB', 'RGBDiff'] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(256)), GroupCenterCrop(224), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)]
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'myDataset': num_class = 12 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return f, axs = plt.subplots(4, 1, figsize=(10, 5)) if args.start_epoch == 0: train_acc = [] train_loss = [] val_acc = [] val_loss = [] epochs = [] val_epochs = [] else: train_acc = np.load("./%s/train_acc.npy" % args.snapshot_pref).tolist() train_loss = np.load("./%s/train_loss.npy" % args.snapshot_pref).tolist() val_acc = np.load("./%s/val_acc.npy" % args.snapshot_pref).tolist() val_loss = np.load("./%s/val_loss.npy" % args.snapshot_pref).tolist() epochs = np.load("./%s/epochs.npy" % args.snapshot_pref).tolist() val_epochs = np.load("./%s/val_epochs.npy" % args.snapshot_pref).tolist() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch acc, loss = train(train_loader, model, criterion, optimizer, epoch) train_acc.append(acc) train_loss.append(loss) epochs.append(epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1, v_loss = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) val_acc.append(prec1) val_loss.append(v_loss) val_epochs.append(epoch) axs[0].plot(val_epochs, val_loss, c='b', marker='.', label='val_loss') axs[1].plot(val_epochs, val_acc, c='r', marker='.', label='val_acc') axs[2].plot(epochs, train_loss, c='b', marker='.', label='train_loss') axs[3].plot(epochs, train_acc, c='r', marker='.', label='train_acc') plt.title('TSN_' + args.snapshot_pref) if epoch == 0: for i in range(4): axs[i].legend(loc='best') plt.pause(0.000001) if not os.path.exists(args.snapshot_pref): os.makedirs(args.snapshot_pref) plt.savefig('./%s/%s.jpg' % (args.snapshot_pref, str(epoch).zfill(5))) np.save("./%s/train_acc.npy" % args.snapshot_pref, train_acc) np.save("./%s/train_loss.npy" % args.snapshot_pref, train_loss) np.save("./%s/val_acc.npy" % args.snapshot_pref, val_acc) np.save("./%s/val_loss.npy" % args.snapshot_pref, val_loss) np.save("./%s/val_epochs.npy" % args.snapshot_pref, val_epochs) np.save("./%s/epochs.npy" % args.snapshot_pref, epochs)
def main(): parser = options() args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'saag01': num_class = 2 else: raise ValueError('Unknown dataset ' + args.dataset) if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=0.5, partial_bn=False) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_size = model.input_size input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() cropping = torchvision.transforms.Compose([ GroupScale(scale_size), GroupCenterCrop(input_size), ]) checkpoint = torch.load(args.checkpoint) start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] state_dict = checkpoint['state_dict'] # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model.load_state_dict(state_dict) test_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.test_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.img_prefix + "_{:05d}" + args.ext if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "_{}_{:05d}" + args.ext, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(input_mean, input_std), ]), custom_prefix=args.custom_prefix), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) ### Test ### test(model, test_loader, args)
def main(args, config): if args.modality == 'RGB': data_length = 1 elif args.modality == 'Flow': data_length = 5 model = TSN(26, args.num_segments, args.modality, base_model=args.arch, new_length=data_length, embed=args.embed, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, context=args.context) input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() normalize = GroupNormalize(input_mean, input_std) dataset = TSNDataSet("train", num_segments=args.num_segments, context=args.context, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB"] else args.flow_prefix+"{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ GroupScale((224,224)), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])) train_loader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=False) val_loader = torch.utils.data.DataLoader( TSNDataSet("val", num_segments=args.num_segments, context=args.context, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale((int(224),int(224))), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) logger = config.get_logger('train') logger.info(model) # get function handles of loss and metrics criterion_categorical = getattr(module_loss, config['loss']) criterion_continuous = getattr(module_loss, config['loss_continuous']) metrics = [getattr(module_metric, met) for met in config['metrics']] metrics_continuous = [getattr(module_metric, met) for met in config['metrics_continuous']] optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) for param_group in optimizer.param_groups: print(param_group['lr']) trainer = Trainer(model, criterion_categorical, criterion_continuous, metrics, metrics_continuous, optimizer, config=config, data_loader=train_loader, valid_data_loader=val_loader, lr_scheduler=lr_scheduler, embed=args.embed) trainer.train()
def main(): torch.set_printoptions(precision=6) global args, best_prec1 args = parser.parse_args() #导入参数设置数据集类数量 if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'cad': num_class = 8 else: raise ValueError('Unknown dataset ' + args.dataset) """ #导入模型,输入包含分类的类别数: # num_class;args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3; # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等; # 采用哪种模型:args.arch,比如resnet101,BNInception等; # 不同输入snippet的融合方式:args.consensus_type,比如avg等; # dropout参数:args.dropout。 """ model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() """ 接着main函数的思路,前面这几行都是在TSN类中定义的变量或者方法,model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()是设置多GPU训练模型。 args.resume这个参数主要是用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练, 因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径。 其中checkpoint = torch.load(args.resume)是用来导入已训练好的模型。 model.load_state_dict(checkpoint[‘state_dict’])是完成导入模型的参数初始化model这个网络的过程,load_state_dict是torch.nn.Module类中重要的方法之一。 """ if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 """ 接下来是main函数中的第二部分:数据导入。首先是自定义的TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型, 一般而言在PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类,比如此处的TSNDataSet类,然后通过重写初始化函数__init__和__getitem__方法来读取数据。 torch.utils.data.Dataset类型的数据并不能作为模型的输入,还要通过torch.utils.data.DataLoader类进一步封装, 这是因为数据读取类TSNDataSet返回两个值,第一个值是Tensor类型的数据,第二个值是int型的标签, 而torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。 对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了。 """ train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=3, pin_memory=True) """ 接下来就是main函数的第三部分:训练模型。这里包括定义损失函数、优化函数、一些超参数设置等,然后训练模型并在指定epoch验证和保存模型。 adjust_learning_rate(optimizer, epoch, args.lr_steps)是设置学习率变化策略,args.lr_steps是一个列表,里面的值表示到达多少个epoch的时候要改变学习率, 在adjust_learning_rate函数中,默认是修改学习率的时候修改成当前的0.1倍。 train(train_loader, model, criterion, optimizer, epoch)就是训练模型,输入包含训练数据、模型、损失函数、优化函数和要训练多少个epoch。 最后的if语句是当训练epoch到达指定值的时候就进行一次模型验证和模型保存,args.eval_freq这个参数就是用来控制保存的epoch值。 prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))就是用训练好的模型验证测试数据集。 最后的save_checkpoint函数就是保存模型参数(model)和其他一些信息,这里我对源代码做了修改,希望有助于理解,该函数中主要就是调用torch.save(mode, save_path)来保存模型。 模型训练函数train和模型验证函数validate函数是重点,后面详细介绍。 """ # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) ''' optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) ''' # try Adam instead. optimizer = torch.optim.Adam(policies, args.lr) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) model = SeqVLAD(num_class, args.num_centers, args.modality, args.timesteps, args.redu_dim, with_relu=args.with_relu, base_model=args.arch, activation=args.activation, seqvlad_type=args.seqvlad_type, init_method=args.init_method, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) #print(model) ''' model = SeqVLAD_with_centerloss(num_class, args.num_centers, args.modality, args.timesteps, args.redu_dim, with_relu = args.with_relu, base_model = args.arch, activation= args.activation, seqvlad_type = args.seqvlad_type, dropout= args.dropout, partial_bn= not args.no_partialbn) centers = model.centers ''' #sys.exit() crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() if args.two_steps is not None: print('two step training ') sub_policies = model.get_sub_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() #model.centers = centers if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model_dict = model.state_dict() if args.resume_type == 'tsn': args.start_epoch = 0 ## exclude certain module pretrained_dict = checkpoint['state_dict'] excluded_modules = [ 'module.new_fc', 'module.base_model.global_pool' ] res_state_dict = filter_excluded_module( pretrained_dict, excluded_modules) model_dict.update(res_state_dict) elif args.resume_type == 'same': if args.two_steps == 0: args.start_epoch = 0 pretrained_dict = checkpoint['state_dict'] res_state_dict = init_from_tsn_model(model_dict, pretrained_dict) model_dict.update(res_state_dict) else: print('==> resume_type must be one of same/tsn') exit() model.load_state_dict(model_dict) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( args.sources, args.train_list, timesteps=args.timesteps, new_length=data_length, modality=args.modality, sampling_method=args.sampling_method, reverse=args.reverse, image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.sources, args.val_list, timesteps=args.timesteps, new_length=data_length, modality=args.modality, image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() #criterion_cent = CenterLoss(num_classes=num_class, feat_dim = 32768).cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) if args.two_steps is not None: for group in sub_policies: print( ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) if args.optim == 'SGD': optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_cent = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.two_steps is not None: sub_optimizer = torch.optim.SGD(sub_policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) sub_optimizer_cent = torch.optim.SGD( sub_policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim == 'Adam': print('use Adam optimizer ... ...') optimizer = torch.optim.Adam(policies, args.lr, weight_decay=args.weight_decay) if args.two_steps is not None: sub_optimizer = torch.optim.Adam(sub_policies, args.lr, weight_decay=args.weight_decay) else: print('optimzer: {} is not implimented, please use SGD or Adam'.format( args.optim)) exit() if args.evaluate: #print("only") validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): #print("hahahaha") if args.two_steps is not None and epoch < args.two_steps: # validate(val_loader, model, criterion,criterion_cent, 0) #print("local optimizer!!") adjust_learning_rate(sub_optimizer, epoch, args.lr_steps) #adjust_learning_rate(sub_optimizer_cent, epoch, args.lr_steps) train(train_loader, model, criterion, sub_optimizer, optimizer_cent, epoch) else: #print("global optimiazer!!!!!") adjust_learning_rate(optimizer, epoch, args.lr_steps) #adjust_learning_rate(optimizer_cent, epoch, args.lr_steps) train(train_loader, model, criterion, optimizer, optimizer_cent, epoch) # print(dir(model)) # print(type(model.parameters.global_pool)) # print(model['global_pool']) # train for one epoch # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
elif args.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) data_loader = torch.utils.data.DataLoader( TSNDataSet("", args.test_list, num_segments=args.test_segments, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ['RGB', 'RGBDiff'] else args.flow_prefix + "{}_{:05d}.jpg", test_mode=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers))
def main_charades(): global args, best_prec1, use_gpu use_gpu = torch.cuda.is_available() categories, args.train_list, args.val_list, args.train_num_list, args.val_num_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality, args.root_path) num_class = len(categories) crop_size = args.crop_size scale_size = args.scale_size input_mean = [0.485, 0.456, 0.406] input_std = [0.229, 0.224, 0.225] train_dataset = TSNDataSet( args.root_path, args.train_list, args.train_num_list, num_class=num_class, num_segments=args.num_segments, new_length=args.data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ GroupMultiScaleCrop(crop_size, [1.0, 0.875, 0.75, 0.66, 0.5], max_distort=2), GroupRandomHorizontalFlip(is_flow=False), Stack(roll=False), ToTorchFormatTensor(div=True), GroupNormalize(input_mean, input_std), ChangeToCTHW(modality=args.modality) ])) val_dataset = TSNDataSet(args.root_path, args.val_list, args.val_num_list, num_class=num_class, num_segments=args.num_segments, new_length=args.data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=False), ToTorchFormatTensor(div=True), GroupNormalize(input_mean, input_std), ChangeToCTHW(modality=args.modality) ])) # model = modelfile.InceptionI3d(num_class, in_channels=3) model = modelfile.gcn_i3d( num_class=num_class, t=0.4, adj_file= './data/Charades_v1/gcn_info/class_graph_conceptnet_context_0.8.pkl', word_file='./data/Charades_v1/gcn_info/class_word.pkl') # define loss function (criterion) criterion = nn.MultiLabelSoftMarginLoss() # define optimizer params = get_config_optim(model, lr=args.lr, weight_decay=args.weight_decay) # params = get_optim_fix_conv(model, lr=args.lr, weight_decay=args.weight_decay) # optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) optimizer = torch.optim.Adam(params, eps=1e-8) state = { 'batch_size': args.batch_size, 'val_batch_size': args.val_batch_size, 'image_size': args.image_size, 'max_epochs': args.epochs, 'evaluate': args.evaluate, 'resume': args.resume, 'num_classes': num_class } state['difficult_examples'] = False state['print_freq'] = args.print_freq state['save_model_path'] = args.save_model_path state['log_path'] = args.log_path state['logname'] = args.logname state['workers'] = args.workers state['epoch_step'] = args.epoch_step state['lr'] = args.lr state['device_ids'] = list(range(torch.cuda.device_count())) if args.evaluate: state['evaluate'] = True mapengine = engine.GCNMultiLabelMAPEngine( state, inp_file='./data/Charades_v1/gcn_info/class_word.pkl') mapengine.learning(model, criterion, train_dataset, val_dataset, optimizer)
if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 elif args.modality == 'RGBFlow': data_length = args.num_motion train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list,
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'STSNN', args.dataset, args.modality, args.arch, 'group%d' % args.num_segments, '%df1c' % args.num_motion ]) print('storing name: ' + args.store_name) model = STSNN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, num_motion=args.num_motion, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) print(model) cudnn.benchmark = True # Data loading code if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')): normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 elif args.modality == 'RGBFlow': data_length = args.num_motion train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
[GroupScale(net.scale_size), GroupCenterCrop(net.input_size)]) elif args.test_crops == 10: cropping = transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) data_loader = torch.utils.data.DataLoader(TSNDataSet( args.data_path, args.mode, num_segments=args.test_segments, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, transform=transforms.Compose([ cropping, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std) ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) print(devices)
def eval_one_model(num_class, modality, weights, devices, args): # init model net = TSN(num_class, 1, modality, base_model=args.arch, consensus_type=args.crop_fusion_type, dropout=args.dropout, mdl=args.mdl, pretrained=False) # load checkpoint checkpoint = torch.load(weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = checkpoint['state_dict'] # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} net.load_state_dict(base_dict) # transformer if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) # prepare dataset if args.dataset == 'ucf101': naming_pattern = "frame{:06d}.jpg" if modality in [ "RGB", "RGBDiff", 'tvl1' ] else args.flow_prefix + "{}_{:06d}.jpg" else: naming_pattern = "image_{:05d}.jpg" if modality in [ "RGB", "RGBDiff" ] else args.flow_prefix + "{}_{:05d}.jpg" data_loader = torch.utils.data.DataLoader(TSNDataSet( os.path.join(args.data_root_path, ('jpegs_256' if modality == 'RGB' else 'tvl1_flow')), args.test_list, num_segments=args.test_segments, new_length=4 if modality == "RGB" else 6, modality=modality, image_tmpl=naming_pattern, test_mode=True, dataset=args.dataset, transform=torchvision.transforms.Compose([ cropping, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) data_gen = iter(data_loader) total_num = len(data_loader.dataset) output = [] # [class probability, label code] # Inferencing net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices) net.eval() max_num = len(data_loader.dataset) for i in tqdm(range(max_num)): data, label = next(data_gen) if i >= max_num: break output.append( eval_video(net, (i, data, label), num_class, modality, args)) video_pred = [np.argmax(np.mean(x[1], axis=0)) for x in output] video_labels = [x[2] for x in output] # summarize results cf = confusion_matrix(video_labels, video_pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit / cls_cnt print('Accuracy of {}, {:.02f}%'.format(modality, np.mean(cls_acc) * 100)) del net del data_loader class_acc_map = class_acc_mapping(cls_acc, args.dataset) return output, video_labels, class_acc_map