def get(cls, args): train_file = args.train_file val_file = args.val_file train_dataset = cls(args, args.data, 'train', train_file, args.cache, transform=transforms.Compose([ videotransforms.RandomCrop(args.input_size), videotransforms.RandomHorizontalFlip() ]), input_size=args.input_size) val_dataset = cls(args, args.data, 'val', val_file, args.cache, transform=transforms.Compose( [videotransforms.CenterCrop(256)]), input_size=args.input_size) valvideo_dataset = cls( args, args.data, 'val_video', val_file, args.cache, #transform=transforms.Compose([ # videotransforms.CenterCrop(256) #]), input_size=args.input_size) return train_dataset, val_dataset, valvideo_dataset
def get(cls, args, splits=('train', 'val', 'val_video')): train_file = args.train_file val_file = args.val_file if 'train' in splits: train_dataset = cls( args, args.data, 'train', train_file, args.cache, transform=transforms.Compose([ videotransforms.RandomCrop(args.input_size), videotransforms.RandomHorizontalFlip() ]), input_size=args.input_size) else: train_dataset = None if 'val' in splits: val_dataset = cls( args, args.data, 'val', val_file, args.cache, transform=transforms.Compose([ videotransforms.CenterCrop(args.input_size) ]), input_size=args.input_size) else: val_dataset = None if 'val_video' in splits: valvideo_dataset = cls( args, args.data, 'val_video', val_file, args.cache, transform=transforms.Compose([ videotransforms.CenterCrop(args.input_size) ]), input_size=args.input_size) else: valvideo_dataset = None return train_dataset, val_dataset, valvideo_dataset
def main(): args = parse_args() # torch.backends.cudnn.benchmark = True os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu pretrain_path = pretrain_path_list[args.pre_path] save_path = params['save_path_base'] + "ft3_classify_{}_{}_".format( pretrain_path.split('/')[-3][14:], args.exp_name) + params['data'] + '_split{}'.format(args.split) sub_dir = 'pt-{}-e{}-ft-{}'.format( pretrain_path.split('/')[-2], pretrain_path.split('/')[-1].split('.')[0].split('_')[-1], time.strftime('%m-%d-%H-%M')) model_save_dir = os.path.join(save_path, sub_dir) writer = SummaryWriter(model_save_dir) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) log_file = os.path.join(model_save_dir, 'log.txt') sys.stdout = Logger(log_file) print(vars(args)) if params['data'] == 'UCF-101': class_num = 101 elif params['data'] == 'HMDB-51': class_num = 51 print('{}: {}'.format(params['data'], class_num)) if args.model_name == 'c3d': model = c3d.C3D(with_classifier=True, num_classes=class_num) elif args.model_name == 'r3d': model = r3d.R3DNet((1, 1, 1, 1), with_classifier=True, num_classes=class_num) elif args.model_name == 'r21d': model = r21d.R2Plus1DNet((1, 1, 1, 1), with_classifier=True, num_classes=class_num) print('Backbone:{}'.format(args.model_name)) start_epoch = 1 pretrain_path = pretrain_path_list[args.pre_path] print('Load model:' + pretrain_path) pretrain_weight = load_pretrained_weights(pretrain_path) print(pretrain_weight.keys()) model.load_state_dict(pretrain_weight, strict=False) # train image_augmentation = None video_augmentation = transforms.Compose([ video_transforms.ToPILImage(), video_transforms.Resize((128, 171)), video_transforms.RandomCrop(112), video_transforms.ToTensor() ]) train_dataset = ClassifyDataSet(params['dataset'], mode="train", split=args.split, dataset=params['data'], video_transforms=video_augmentation, image_transforms=image_augmentation) if params['data'] == 'UCF-101': val_size = 800 elif params['data'] == 'HMDB-51': val_size = 400 train_dataset, val_dataset = random_split( train_dataset, (len(train_dataset) - val_size, val_size)) print("num_works:{:d}".format(params['num_workers'])) print("batch_size:{:d}".format(params['batch_size'])) train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True, num_workers=params['num_workers']) val_loader = DataLoader(val_dataset, batch_size=params['batch_size'], shuffle=True, num_workers=params['num_workers']) if multi_gpu == 1: model = nn.DataParallel(model) model = model.cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.SGD(model.parameters(), lr=params['learning_rate'], momentum=params['momentum'], weight_decay=params['weight_decay']) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1) # for data in train_loader: # clip , label = data; # writer.add_video('train/clips',clip,0,fps=8) # writer.add_text('train/idx',str(label.tolist()),0) # clip = clip.cuda() # writer.add_graph(model,(clip,clip)); # break # for name,param in model.named_parameters(): # writer.add_histogram('params/{}'.format(name),param,0); prev_best_val_loss = float('inf') prev_best_loss_model_path = None prev_best_acc_model_path = None best_acc = 0 best_epoch = 0 for epoch in tqdm(range(start_epoch, start_epoch + params['epoch_num'])): scheduler.step() train(train_loader, model, criterion, optimizer, epoch, writer) val_loss, top1_avg = validation(val_loader, model, criterion, optimizer, epoch) if top1_avg >= best_acc: best_acc = top1_avg print("i am best :", best_acc) best_epoch = epoch model_path = os.path.join( model_save_dir, 'best_acc_model_{}.pth.tar'.format(epoch)) torch.save(model.state_dict(), model_path) # if prev_best_acc_model_path: # os.remove(prev_best_acc_model_path) # prev_best_acc_model_path = model_path if val_loss < prev_best_val_loss: model_path = os.path.join( model_save_dir, 'best_loss_model_{}.pth.tar'.format(epoch)) torch.save(model.state_dict(), model_path) prev_best_val_loss = val_loss # if prev_best_loss_model_path: # os.remove(prev_best_loss_model_path) # prev_best_loss_model_path = model_path # scheduler.step(val_loss); if epoch % 20 == 0: checkpoints = os.path.join(model_save_dir, str(epoch) + ".pth.tar") torch.save(model.state_dict(), checkpoints) print("save_to:", checkpoints) print("best is :", best_acc, best_epoch)
def main(): args = parse_args() torch.backends.cudnn.benchmark = True os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu save_path = params['save_path_base'] + "train_predict_{}_".format( args.exp_name) + params['data'] model_save_dir = os.path.join(save_path, time.strftime('%m-%d-%H-%M')) writer = SummaryWriter(model_save_dir) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) log_file = os.path.join(model_save_dir, 'log.txt') sys.stdout = Logger(log_file) print(vars(args)) if args.model_name == 'c3d': print(args.model_name) model = c3d.C3D_Hed(with_classifier=False) elif args.model_name == 'r3d': print(args.model_name) model = r3d.R3DNet_Hed((1, 1, 1, 1), with_classifier=False) elif args.model_name == 'r21d': print(args.model_name) model = r21d.R2Plus1DNet_Hed((1, 1, 1, 1), with_classifier=False) model = sscn.SSCN_OneClip(args.model_name, base_network=model, with_classifier=True, num_classes=4, with_ClsEncoder=args.enc_head.split('_')) print(model) if ckpt: weight = load_pretrained_weights(ckpt) model.load_state_dict(weight, strict=False) # train image_augmentation = None video_augmentation = transforms.Compose([ video_transforms.ToPILImage(), video_transforms.Resize((128, 171)), video_transforms.RandomCrop(112), video_transforms.ToTensor() ]) train_dataset = PredictDataset(params['dataset'], mode="train", dataset=params['data'], video_transforms=video_augmentation, image_transforms=image_augmentation, args=args) if params['data'] == 'kinetics-400': val_dataset = PredictDataset(params['dataset'], mode='val', dataset=params['data'], video_transforms=video_augmentation, image_transforms=image_augmentation, args=args) elif params['data'] == 'UCF-101': val_size = 800 train_dataset, val_dataset = random_split( train_dataset, (len(train_dataset) - val_size, val_size)) elif params['data'] == 'hmdb': val_size = 400 train_dataset, val_dataset = random_split( train_dataset, (len(train_dataset) - val_size, val_size)) train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True, num_workers=params['num_workers'], drop_last=True) val_loader = DataLoader(val_dataset, batch_size=params['batch_size'], shuffle=True, num_workers=params['num_workers'], drop_last=True) if multi_gpu == 1: model = nn.DataParallel(model) model = model.cuda() criterion_CE = nn.CrossEntropyLoss().cuda() criterion_MSE = Motion_MSEloss_NFGT().cuda() model_params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'fc8' in key: print(key) model_params += [{'params': [value], 'lr': 10 * learning_rate}] else: model_params += [{'params': [value], 'lr': learning_rate}] optimizer = optim.SGD(model_params, momentum=params['momentum'], weight_decay=params['weight_decay']) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', min_lr=1e-7, patience=50, factor=0.1) prev_best_val_loss = 100 prev_best_loss_model_path = None for epoch in tqdm(range(start_epoch, start_epoch + args.epochs)): train(train_loader, model, criterion_MSE, criterion_CE, optimizer, epoch, writer, args=args) val_loss = validation(val_loader, model, criterion_MSE, criterion_CE, optimizer, epoch, args=args) if val_loss < prev_best_val_loss: model_path = os.path.join(model_save_dir, 'best_model_{}.pth.tar'.format(epoch)) torch.save(model.state_dict(), model_path) prev_best_val_loss = val_loss if prev_best_loss_model_path: os.remove(prev_best_loss_model_path) prev_best_loss_model_path = model_path scheduler.step(val_loss) if epoch % 20 == 0: checkpoints = os.path.join(model_save_dir, 'model_{}.pth.tar'.format(epoch)) torch.save(model.state_dict(), checkpoints) print("save_to:", checkpoints)