def load_models(opt): if opt.root_path != '': opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}'.format(opt.model) opt.mean = get_mean(opt.norm_value) opt.std = get_std(opt.norm_value) torch.manual_seed(opt.manual_seed) classifier, parameters = generate_model(opt) if opt.resume_path: # print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path, map_location=torch.device('cpu')) # assert opt.arch == checkpoint['arch'] classifier.load_state_dict(checkpoint['state_dict']) return classifier
def get_3d_model(num_feature2d_slices=30): opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) opt.sample_duration = num_feature2d_slices # print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) return model
def __init__(self, model_file, sample_duration, model_type, cuda_id=0): self.opt = parse_opts() self.opt.model = model_type self.opt.root_path = './C3D_ResNet/data' self.opt.resume_path = os.path.join(self.opt.root_path, model_file) self.opt.pretrain_path = os.path.join(self.opt.root_path, 'models/resnet-18-kinetics.pth') self.opt.cuda_id = cuda_id self.opt.dataset = 'ucf101' self.opt.n_classes = 400 self.opt.n_finetune_classes = 3 self.opt.ft_begin_index = 4 self.opt.model_depth = 18 self.opt.resnet_shortcut = 'A' self.opt.sample_duration = sample_duration self.opt.batch_size = 1 self.opt.n_threads = 1 self.opt.checkpoint = 5 self.opt.arch = '{}-{}'.format(self.opt.model, self.opt.model_depth) self.opt.mean = get_mean(self.opt.norm_value, dataset=self.opt.mean_dataset) self.opt.std = get_std(self.opt.norm_value) # print(self.opt) print('Loading C3D action-recognition model..') self.model, parameters = generate_model(self.opt) # print(self.model) if self.opt.no_mean_norm and not self.opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not self.opt.std_norm: norm_method = Normalize(self.opt.mean, [1, 1, 1]) else: norm_method = Normalize(self.opt.mean, self.opt.std) if self.opt.resume_path: print(' loading checkpoint {}'.format(self.opt.resume_path)) checkpoint = torch.load(self.opt.resume_path) # assert self.opt.arch == checkpoint['arch'] self.opt.begin_epoch = checkpoint['epoch'] self.model.load_state_dict(checkpoint['state_dict']) self.spatial_transform = Compose([ ScaleQC(int(self.opt.sample_size / self.opt.scale_in_test)), CornerCrop(self.opt.sample_size, self.opt.crop_position_in_test), ToTensor(self.opt.norm_value), norm_method ]) self.target_transform = ClassLabel() self.model.eval()
def model_process(count, model): opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) #opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) #print(opt) #print(opt.result_path) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) #print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) print('testing is run') if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) tester.test(count, test_loader, model, opt, test_data.class_names)
def load_models(opt): opt.resume_path = opt.resume_path_clf opt.pretrain_path = opt.pretrain_path_clf opt.sample_duration = opt.sample_duration_clf opt.model = opt.model_clf opt.model_depth = opt.model_depth_clf opt.modality = opt.modality_clf opt.resnet_shortcut = opt.resnet_shortcut_clf opt.n_classes = opt.n_classes_clf opt.n_finetune_classes = opt.n_finetune_classes_clf if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value) opt.std = get_std(opt.norm_value) print(opt) with open( os.path.join(opt.result_path, 'opts_clf_{}.json'.format(opt.store_name)), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) if opt.modality == 'Depth': opt.modality = 'RGB' classifier, parameters = generate_model(opt) if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) assert opt.arch == checkpoint['arch'] classifier.load_state_dict(checkpoint['state_dict']) if opt.sample_duration_clf < 32 and opt.model_clf != 'c3d': classifier = _modify_first_conv_layer(classifier, 3, 3) classifier = _construct_depth_model(classifier) classifier = classifier.cuda() if not opt.modality == opt.modality_clf: opt.modality = opt.modality_clf print('Model \n', classifier) pytorch_total_params = sum(p.numel() for p in classifier.parameters() if p.requires_grad) print("Total number of trainable parameters: ", pytorch_total_params) return classifier
def load_models(opt): opt.resume_path = opt.resume_path_clf opt.pretrain_path = opt.pretrain_path_clf opt.sample_duration = opt.sample_duration_clf opt.model = opt.model_clf opt.model_depth = opt.model_depth_clf opt.width_mult = opt.width_mult_clf opt.modality = opt.modality_clf opt.resnet_shortcut = opt.resnet_shortcut_clf opt.n_classes = opt.n_classes_clf opt.n_finetune_classes = opt.n_finetune_classes_clf if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}'.format(opt.model) opt.mean = get_mean(opt.norm_value) opt.std = get_std(opt.norm_value) print(opt) with open(os.path.join(opt.result_path, 'opts_clf.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) classifier, parameters = generate_model(opt) if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path, map_location=torch.device('cpu')) # assert opt.arch == checkpoint['arch'] classifier.load_state_dict(checkpoint['state_dict']) print('Model 2 \n', classifier) pytorch_total_params = sum(p.numel() for p in classifier.parameters() if p.requires_grad) print("Total number of trainable parameters: ", pytorch_total_params) return classifier
def __init__(self, root_path, video_path, annotation_path, result_path, model_path, modality): print('***checkpoint***') opt = parse_opts_offline([ '--root_path', root_path, '--video_path', video_path, '--annotation_path', annotation_path, '--result_path', result_path, '--resume_path', model_path, '--dataset', 'ems', '--sample_duration', '32', '--model', 'resnext', '--model_depth', '101', '--resnet_shortcut', 'B', '--batch_size', '1', '--n_finetune_classes', '4', '--n_threads', '1', '--checkpoint', '1', '--modality', modality, '--n_val_samples', '1', '--test_subset', 'test' ]) if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) # opt.result_path = os.path.join(opt.root_path, opt.result_path) # if opt.resume_path: # opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value) opt.std = get_std(opt.norm_value) print(opt) #%% warnings.filterwarnings('ignore') torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print("Total number of trainable parameters: ", pytorch_total_params) self.opt = opt self.model = model self.parameters = parameters
def extract_feats(file_path, net, filenames, frame_num, batch_size, save_path): """Extract 3D features (saved in .npy) for a video. """ net.eval() mean = get_mean(255, dataset='kinetics') std = get_std(255) transform = Compose([ trn.ToPILImage(), Scale(112), CornerCrop(112, 'c'), ToTensor(), Normalize(mean, std) ]) print("Network loaded") #Read videos and extract features in batches for file in filenames[start_idx:end_idx]: feat_file = os.path.join(save_path, file[:-4] + '.npy') if os.path.exists(feat_file): continue vid = imageio.get_reader(os.path.join(file_path, file), 'ffmpeg') curr_frames = [] for frame in vid: if len(frame.shape) < 3: frame = np.repeat(frame, 3) curr_frames.append(transform(frame).unsqueeze(0)) curr_frames = torch.cat(curr_frames, dim=0) print("Shape of frames: {0}".format(curr_frames.shape)) idx = np.linspace(0, len(curr_frames) - 1, frame_num).astype(int) print("Captured {} clips: {}".format(len(idx), curr_frames.shape)) curr_feats = [] for i in range(0, len(idx), batch_size): curr_batch = [ curr_frames[x - 8:x + 8, ...].unsqueeze(0) for x in idx[i:i + batch_size] ] curr_batch = torch.cat(curr_batch, dim=0).cuda() out = net(curr_batch.transpose(1, 2).cuda()) curr_feats.append(out.detach().cpu()) print("Appended {} features {}".format(i + 1, out.shape)) curr_feats = torch.cat(curr_feats, 0) del out #set_trace() np.save(feat_file, curr_feats.numpy()) print("Saved file {}\nExiting".format(file[:-4] + '.npy'))
'prec1': top1.avg, 'prec5': top5.avg, }) return top1.avg, top5.avg, losses.avg if __name__ == '__main__': args = parse_args() if not os.path.exists(args.result_path): os.makedirs(args.result_path) args.scales = [args.initial_scale] for i in range(1, args.n_scales): args.scales.append(args.scales[-1] * args.scale_step) args.mean = get_mean(args.norm_value, dataset=args.mean_dataset) args.std = get_std(args.norm_value) print(args) with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(args), opt_file) torch.manual_seed(args.manual_seed) print('\n==> Building Model...') model, classifier = set_model(args) # print('\n==> Freeze backbone...') # for param in model.parameters(): # param.requires_grad = False # criterion = nn.CrossEntropyLoss().cuda()
if __name__ == '__main__': # os.environ['CUDA_VISIBLE_DEVICES'] = '4' opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = opt.model opt.mean = get_mean(opt.norm_value, model=opt.model) opt.std = get_std(opt.norm_value, model=opt.model) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) setup_seed(opt.manual_seed) model, parameters = generate_model(opt) print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1])
def objective(trial): opt = parse_opts() if trial: opt.weight_decay = trial.suggest_uniform('weight_decay', 0.01, 0.1) opt.learning_rate = trial.suggest_uniform('learning_rate', 1 - 5, 1 - 4) if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) # norm_method = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, # sampler option is mutually exclusive with shuffle shuffle=False, sampler=ImbalancedDatasetSampler(training_data), num_workers=opt.n_threads, pin_memory=True) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) optimizer = optim.Adam(parameters, lr=opt.learning_rate, weight_decay=opt.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, factor=0.1**0.5) if not opt.no_val: spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader( validation_data, batch_size=opt.batch_size, shuffle=False, sampler=ImbalancedDatasetSampler(validation_data), num_workers=opt.n_threads, pin_memory=True) val_logger = Logger(os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc']) if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) assert opt.arch == checkpoint['arch'] opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) if not opt.no_train: optimizer.load_state_dict(checkpoint['optimizer']) print('run') writer = SummaryWriter( comment= f"_wd{opt.weight_decay}_lr{opt.learning_rate}_ft_begin{opt.ft_begin_index}_pretrain{not opt.pretrain_path == ''}" ) for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: epoch, losses_avg, accuracies_avg = train_epoch( i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) writer.add_scalar('loss/train', losses_avg, epoch) writer.add_scalar('acc/train', accuracies_avg, epoch) if not opt.no_val: epoch, val_losses_avg, val_accuracies_avg = val_epoch( i, val_loader, model, criterion, opt, val_logger) writer.add_scalar('loss/val', val_losses_avg, epoch) writer.add_scalar('acc/val', val_accuracies_avg, epoch) if not opt.no_train and not opt.no_val: scheduler.step(val_losses_avg) print('=' * 100) if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) test.test(test_loader, model, opt, test_data.class_names) writer.close() return val_losses_avg
if __name__ == '__main__': opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) if not os.path.exists(opt.result_path): os.makedirs(opt.result_path) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value, dataset=opt.mean_dataset) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm:
if opt_prune.root_path != '': opt_prune.result_path = os.path.join(opt_prune.root_path, opt_prune.result_path) if opt_prune.pretrain_path: opt_prune.pretrain_path = os.path.join( opt_prune.root_path, opt_prune.pretrain_path) #pretrained model path opt_prune.scales = [opt_prune.initial_scale] for i in range(1, opt_prune.n_scales): opt_prune.scales.append(opt_prune.scales[-1] * opt_prune.scale_step) #opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt_prune.arch = '{}'.format(opt_prune.model) opt_prune.mean = get_mean(opt_prune.norm_value, dataset=opt_prune.mean_dataset) opt_prune.std = get_std(opt_prune.norm_value) opt_prune.store_name = '_'.join([ opt_prune.dataset, opt_prune.model, opt_prune.modality, str(opt_prune.sample_duration) ]) print(opt_prune) torch.manual_seed(opt_prune.manual_seed) #model model model model model model model model model model, parameters = generate_model( opt_prune) #if opt_prune.pretrain_path , 预装模型初始化和加载 print(model) ''' opt_prune = parse_opts()
print('Generated') if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value, opt.dataset) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) print(model) summary(model, input_size=(3, 64, 112, 112)) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda(device=opt.cuda_id) torch.cuda.device(opt.cuda_id) if opt.no_mean_norm and not opt.std_norm:
FLAGS.model + '.py') if not os.path.exists(DUMP_DIR): os.mkdir(DUMP_DIR) os.system('cp %s %s ' % (__file__, DUMP_DIR)) # bkp of evaluation file os.system('cp %s %s ' % (COMMAND_FILE, DUMP_DIR)) # bkp of command shell file os.system('cp %s %s' % (MODEL_FILE, DUMP_DIR)) # bkp of model def os.system('cp utils/net_utils.py %s ' % (DUMP_DIR)) # bkp of net_utils file LOG_FOUT = open(os.path.join(DUMP_DIR, 'log_evaluate.txt'), 'w') LOG_FOUT.write(str(FLAGS) + '\n') NUM_CLASSES = FLAGS.num_classes HOSTNAME = socket.gethostname() # validation transform normalize = spatial_transforms.ToNormalizedTensor(mean=get_mean(), std=get_std()) if FCN == 0: val_transform = spatial_transforms.Compose([ spatial_transforms.Resize(FULL_SIZE), spatial_transforms.CenterCrop(WIDTH), normalize ]) elif FCN == 1: val_transform = spatial_transforms.Compose([ spatial_transforms.Resize(FULL_SIZE), spatial_transforms.CenterCrop(WIDTH), normalize ]) elif FCN == 3: val_transform = spatial_transforms.Compose( [spatial_transforms.Resize(FULL_SIZE), normalize]) elif FCN == 5: val_transform = spatial_transforms.Compose(
def create_3d_resnet(ema=False, num_classes=101): if __name__ == '__main__': args = opts.parse_opts() if not os.path.exists(args.result_path): os.makedirs(args.result_path) # for key in cfg.keys(): # print('{}: {}'.format(key, cfg[key])) # if not os.path.exists(os.path.join(args.result_path, 'config.py')): # shutil.copyfile('./config.py', os.path.join(args.result_path, 'config.py')) args.scales = [args.initial_scale] for i in range(1, args.n_scales): args.scales.append(args.scales[-1] * args.scales_step) args.arch = 'resnet18' args.mean = get_mean(1, dataset='activitynet') args.std = get_std(args.norm_value) print(args) with open(os.path.join(args.result_path, 'args.json'), 'w') as args_file: json.dump(vars(args), args_file) torch.manual_seed(args.manual_seed) # writer = SummaryWriter(log_dir='./results') train_batch_logger = Logger(os.path.join(args.result_path, args.pth_name + '_' + 'train_batch.log'), ['epoch', 'batch', 'iter', 'class_loss', 'consistency_loss', 'prec1', 'ema_prec1', 'lr']) train_epoch_logger = Logger(os.path.join(args.result_path, args.pth_name + '_' + 'train_epoch.log'), ['epoch', 'class_loss', 'consistency_loss', 'prec1', 'ema_prec1']) val_logger = Logger(os.path.join(args.result_path, args.pth_name + '_' + 'val.log'), ['epoch', 'loss', 'prec1']) student_model = create_model().cuda() # student ema_model = create_model(ema=True).cuda() # teacher train_set, val_set, classes = prepare_cifar10(args.dataset_root) train_loader, val_loader = sample_train(train_set, val_set, len(classes), args) # classification error is ignored for unlabeled samples, but averaged by whole batch, not just labeled samples class_criterion = nn.CrossEntropyLoss(ignore_index=args.NO_LABEL, reduction='sum').cuda() if args.consistency_type == 'mse': consistency_criterion = softmax_mse_loss elif args.consistency_type == 'kl': consistency_criterion = softmax_kl_loss else: consistency_criterion = None exit('wrong consistency type! Check config file!') criterion = {'classification': class_criterion, 'consistency': consistency_criterion} optimizer = torch.optim.SGD(student_model.parameters(), args.init_lr, momentum=0.9, weight_decay=args.weight_decay, nesterov=True) best_prec1 = 0 for epoch in range(args.num_epochs): train_epoch(epoch, student_model, ema_model, train_loader, optimizer, criterion, train_batch_logger, train_epoch_logger, args) state = {'epoch': epoch, 'state_dict': student_model.state_dict(), 'ema_state_dict': ema_model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1} save_checkpoint(state, False, args.result_path, args.pth_name) validation_loss, prec1 = validate_epoch(epoch, student_model, val_loader, criterion, val_logger, args) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) state = {'epoch': epoch, 'state_dict': student_model.state_dict(), 'ema_state_dict': ema_model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict()} save_checkpoint(state, is_best, args.result_path, args.pth_name)
def create_dataloader(args): if args.root_path != '': args.video_path = os.path.join(args.root_path, args.video_path) args.annotation_path = os.path.join(args.root_path, args.annotation_path) args.result_path = os.path.join(args.root_path, args.result_path) if args.resume_path: args.resume_path = os.path.join(args.root_path, args.resume_path) if args.pretrain_path: # args.pretrain_path = os.path.join(args.root_path, args.pretrain_path) args.pretrain_path = os.path.abspath(args.pretrain_path) args.scales = [args.initial_scale] for i in range(1, args.n_scales): args.scales.append(args.scales[-1] * args.scale_step) args.mean = get_mean(args.norm_value, dataset=args.mean_dataset) args.std = get_std(args.norm_value) if args.no_mean_norm and not args.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not args.std_norm: norm_method = Normalize(args.mean, [1, 1, 1]) else: norm_method = Normalize(args.mean, args.std) assert args.train_crop in ['random', 'corner', 'center'] if args.train_crop == 'random': crop_method = MultiScaleRandomCrop(args.scales, args.sample_size) elif args.train_crop == 'corner': crop_method = MultiScaleCornerCrop(args.scales, args.sample_size) elif args.train_crop == 'center': crop_method = MultiScaleCornerCrop(args.scales, args.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(args.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(args.sample_duration) target_transform = ClassLabel() training_data = get_training_set(args, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=args.batch_size, shuffle=True, num_workers=args.n_threads, pin_memory=True) spatial_transform = Compose([ # Scale(args.sample_size), Scale(int(args.sample_size / args.scale_in_test)), # CenterCrop(args.sample_size), CornerCrop(args.sample_size, args.crop_position_in_test), ToTensor(args.norm_value), norm_method ]) temporal_transform = TemporalCenterCrop(args.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(args, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=1, shuffle=False, num_workers=args.n_threads, pin_memory=True) return train_loader, val_loader
def get_ucf_data(opt): mean = get_mean(opt.norm_value, dataset='kinetics') std = get_std(opt.norm_value) norm_method = Normalize(mean, [1, 1, 1]) spatial_transform = Compose([ Scale(opt.sample_size), CornerCrop(opt.sample_size, 'c'), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() # VideoID() # get training data training_data = UCF101(opt.video_path, opt.annotation_path, 'training', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap training data train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) # True # get validation data val_data = UCF101(opt.video_path, opt.annotation_path, 'validation', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap validation data val_loader = torch.utils.data.DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) target_transform = VideoID() # get test data test_data = UCF101(opt.video_path, opt.annotation_path, 'testing', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap test data test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) return train_loader, val_loader, test_loader, test_data
if __name__ == '__main__': opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm:
def __init__(self): self.model_methods = [['resnext', 'gradcam', 'camshow']] self.classes = [ "brush_hair", "cartwheel", "catch", "chew", "clap", "climb", "climb_stairs", "dive", "draw_sword", "dribble", "drink", "eat", "fall_floor", "fencing", "flic_flac", "golf", "handstand", "hit", "hug", "jump", "kick", "kick_ball", "kiss", "laugh", "pick", "pour", "pullup", "punch", "push", "pushup", "ride_bike", "ride_horse", "run", "shake_hands", "shoot_ball", "shoot_bow", "shoot_gun", "sit", "situp", "smile", "smoke", "somersault", "stand", "swing_baseball", "sword", "sword_exercise", "talk", "throw", "turn", "walk", "wave" ] scales = [1.0] self.spatial_transform = Compose([ MultiScaleCornerCrop(scales, 112), ToTensor(1.0), Normalize(get_mean(1.0, dataset='activitynet'), get_std(1.0)) ]) self.spatial_transform2 = Compose([MultiScaleCornerCrop(scales, 112)]) self.spatial_transform3 = Compose([ MultiScaleCornerCrop(scales, 112), ToTensor(1), Normalize([0, 0, 0], [1, 1, 1]) ]) self.model = utils.load_model(self.model_methods[0][0]) self.model.cuda() #self.video=[] #self.flows=[] self.bb_frames = [] #self.explainer= get_explainer method_name = 'gradcam' self.explainer = get_explainer(self.model, method_name, "conv1") self.explainer2 = get_explainer(self.model, method_name, "layer1") self.explainer3 = get_explainer(self.model, method_name, "layer2") self.explainer4 = get_explainer(self.model, method_name, "layer3") self.explainer5 = get_explainer(self.model, method_name, "layer4") self.explainer6 = get_explainer(self.model, method_name, "avgpool") path = "images/frames4" #print path self.path = path + "/" #dirc = os.listdir(path) #self.files = [ fname for fname in dirc if fname.startswith('img')] #self.files2 = [ fname for fname in dirc if fname.startswith('flow_x')] self.seq = [] self.kls = [] self.scr = [] self.totalhit = 0 self.totalhit2 = 0 self.totalhit3 = 0 self.totalhit4 = 0 self.totalhit5 = 0 self.totalhit6 = 0 self.totalhit7 = 0 self.totalframes = 0
if __name__ == '__main__': opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = 'resnet-{}'.format(opt.model_depth) opt.mean = get_mean() opt.std = get_std() print(opt, flush=True) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters, arch_parameters = generate_model(opt) print(model, flush=True) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() norm_method = Normalize(opt.mean, opt.std) if not opt.no_train:
def main(args): import os import numpy as np import sys import json import torch from torch import nn from torch import optim from torch.optim import lr_scheduler from opts import parse_opts from mean import get_mean, get_std from spatial_transforms import ( Compose, Normalize, Scale, CenterCrop, CornerCrop, MultiScaleCornerCrop, MultiScaleRandomCrop, RandomHorizontalFlip, ToTensor) from temporal_transforms import LoopPadding, TemporalRandomCrop from target_transforms import ClassLabel, VideoID from target_transforms import Compose as TargetCompose from dataset import get_training_set, get_validation_set, get_test_set from utils import Logger from train import train_epoch from validation import val_epoch import test import collections from sklearn.svm import LinearSVC from sklearn.svm import SVC from joblib import dump, load from sklearn import preprocessing from scipy import stats from sklearn.metrics import accuracy_score local_path = os.getcwd() if args.video_directory_path in ["", " ", '', './video', './video/']: video_path = local_path + '/video/' else: video_path = args.video_directory_path video_path_jpg = local_path + '/video_jpg/' if not os.path.exists(video_path_jpg): os.makedirs(video_path_jpg) extracted_feature_path = local_path + '/extracted_features' if not os.path.exists(extracted_feature_path): os.makedirs(extracted_feature_path) final_results_path = local_path + '/final_test_results' if not os.path.exists(final_results_path): os.makedirs(final_results_path) os.system('python utils/video_jpg.py' + ' ' + video_path + ' ' + video_path_jpg) os.system('python utils/n_frames.py' + ' ' + video_path_jpg) if args.pretrain_directory_path in ["", " ", '', './pretrain', './pretrain/']: pretrain_directory_path = local_path + '/pretrain' else: pretrain_directory_path = args.pretrain_directory_path import easydict opt = easydict.EasyDict({ "n_classes": 2, "sample_size": 112, "sample_duration": 16, "batch_size": 16, "n_threads": 4, "norm_value": 1, "resnet_shortcut": 'B', "resnext_cardinality": 32, }) opt.root_path = local_path opt.video_path = video_path_jpg # use two gpu devices on the server, you can customize it depending on how many available gpu devices you have os.environ['CUDA_VISIBLE_DEVICES']='0' from datasets.no_label_binary import NoLabelBinary mean = get_mean(opt.norm_value, dataset='kinetics') std = get_std(opt.norm_value) norm_method = Normalize(mean, [1,1,1]) spatial_transform = Compose([ Scale(opt.sample_size), CornerCrop(opt.sample_size, 'c'), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() # ClassLabel() # get test data test_data = NoLabelBinary( opt.video_path, None, 'testing', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=opt.sample_duration) # wrap test data test_loader = torch.utils.data.DataLoader( test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) # ### Extract Features # ##### 3D ResNeXt-101 from models import resnext # construct model architecture model_rxt101 = resnext.resnet101( num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality, sample_size=opt.sample_size, sample_duration=opt.sample_duration) model_rxt101 = model_rxt101.cuda() # wrap the current model again in nn.DataParallel / or we can just remove the .module keys. model_rxt101 = nn.DataParallel(model_rxt101, device_ids=None) ### Load pretrained weight # customize the pretrained model path pretrain = torch.load(pretrain_directory_path + '/resnext-101-kinetics.pth') pretrain_dict = pretrain['state_dict'] # do not load the last layer since we want to fine-tune it pretrain_dict.pop('module.fc.weight') pretrain_dict.pop('module.fc.bias') model_dict = model_rxt101.state_dict() model_dict.update(pretrain_dict) model_rxt101.load_state_dict(model_dict) # register layer index to extract the features by forwarding all the video clips activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model_rxt101.module.avgpool.register_forward_hook(get_activation('avgpool')) model_rxt101.eval() # forward all the videos to extract features avgpool_test = [] targets_test = [] with torch.no_grad(): print("Extract test set features:") for i, (inputs, target) in enumerate(test_loader): if i % 30 == 0: print(i) output = model_rxt101(inputs) avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu()) targets_test.append(target) avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0) np.save(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy', avgpool_test_np) targets_test_np = np.concatenate(np.array(targets_test), axis=0) np.save(opt.root_path + '/extracted_features/class_names_test.npy', targets_test_np) # ##### 3D ResNet-50 from models import resnet # construct model architecture model_rt50 = resnet.resnet50( num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) model_rt50 = model_rt50.cuda() # wrap the current model again in nn.DataParallel / or we can just remove the .module keys. model_rt50 = nn.DataParallel(model_rt50, device_ids=None) ### Load pretrained weight # customize the pretrained model path pretrain = torch.load(pretrain_directory_path + '/resnet-50-kinetics.pth') pretrain_dict = pretrain['state_dict'] # do not load the last layer since we want to fine-tune it pretrain_dict.pop('module.fc.weight') pretrain_dict.pop('module.fc.bias') model_dict = model_rt50.state_dict() model_dict.update(pretrain_dict) model_rt50.load_state_dict(model_dict) # register layer index to extract the features by forwarding all the video clips activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model_rt50.module.avgpool.register_forward_hook(get_activation('avgpool')) model_rt50.eval() # forward all the videos to extract features avgpool_test = [] with torch.no_grad(): print("Extract test set features:") for i, (inputs, target) in enumerate(test_loader): if i % 30 == 0: print(i) output = model_rt50(inputs) avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu()) # save the features avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0) np.save(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy', avgpool_test_np) # ### Load & fuse the features x_test_1 = np.load(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy') x_test_2 = np.load(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy') x_test = np.concatenate([x_test_1, x_test_2], axis=1) y_test = np.load(opt.root_path + '/extracted_features/class_names_test.npy') # ### Load Classification head and predict if args.model == 'hw4': # hw4 best model clf = load('./hw6_results/logistic2_ucf.joblib') y_pred_test_raw = clf.predict(x_test_2) y_pred_test_prob_raw = clf.predict_proba(x_test_2) elif args.model == 'hw5': # hw5 best model clf = load('./hw6_results/logistic_ucf.joblib') y_pred_test_raw = clf.predict(x_test) y_pred_test_prob_raw = clf.predict_proba(x_test) elif args.model == 'hw6': # hw6 best model clf = load('./hw6_results/logistic1_ucf.joblib') y_pred_test_raw = clf.predict(x_test_1) y_pred_test_prob_raw = clf.predict_proba(x_test_1) elif args.model == 'hw8': # hw8 best model clf = load('./hw8_results/logistic_ucf.joblib') y_pred_test_raw = clf.predict(x_test) y_pred_test_prob_raw = clf.predict_proba(x_test) elif args.model == 'final': # Final best model clf = load('./hw8_results/logistic1_ucf.joblib') y_pred_test_raw = clf.predict(x_test_1) y_pred_test_prob_raw = clf.predict_proba(x_test_1) split_idx = [] for idx, y_name in enumerate(y_test): if idx == 0 or y_name != y_test[idx-1]: split_idx.append(idx) split_idx.append(len(y_test)) y_pred_test, y_pred_test_prob, y_pred_test_final = {}, {}, {} for i, split in enumerate(split_idx): if i < len(split_idx) - 1: y_pred_test[y_test[split]] = y_pred_test_raw[split:split_idx[i+1]] y_pred_test_prob[y_test[split]] = y_pred_test_prob_raw[split:split_idx[i+1]] y_pred_test_final[y_test[split]] = np.argmax(np.mean(y_pred_test_prob_raw[split:split_idx[i+1]], axis=0)) # ### Get the length (in seconds) of each video clip tvns = list(y_pred_test_final.keys()) mp4_path = video_path clip_duration_dict = {} from moviepy.editor import VideoFileClip i = 0 for tvn in tvns: i += 1 if i % 100 == 0: print(i) clip = VideoFileClip(os.path.join(mp4_path, tvn + ".mp4")) clip_duration_dict[tvn] = [clip.duration] # ### Generate Figures import matplotlib.pyplot as plt for tvn in clip_duration_dict: interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn) x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval y_idx = np.argmax(y_pred_test_prob[tvn], 1) y = y_pred_test_prob[tvn][:, 1] x = x[:len(y)] plt.plot(x, y) plt.ylim([-0.1, 1.1]) plt.xlabel ('time/sec') plt.ylabel ('pred score for ground truth label') plt.title("Ground Truth Label: " + tvn + "\n Model Avg. Predict Score: " + str(np.mean(y))) # str(real_prediction_dict[tvn]['score']) plt.savefig(opt.root_path + "/final_test_results/" + tvn + '_' + args.model + "_UIN-625007598", bbox_inches='tight') plt.close() # ### Generate Json timeTrueLabel = {} for tvn in clip_duration_dict: if tvn in y_pred_test_prob: interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn) x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval y_idx = np.argmax(y_pred_test_prob[tvn], 1) y = y_pred_test_prob[tvn][:, 1] x = x[:len(y)] timeTrueLabel[tvn] = [[str(time), str(y[idx])] for idx, time in enumerate(x)] with open(opt.root_path + '/final_test_results/timeLabel_' + args.model + '_UIN-625007598.json', 'w') as fp: json.dump(timeTrueLabel, fp)
if __name__ == '__main__': opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) # print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm:
def main(): opt = parse_opts() # Path configurations opt.annotation_path = os.path.join(opt.annotation_directory, opt.annotation_path) save_result_dir_name = \ os.path.join(opt.result_path, get_prefix() + '_{}{}_{}_epochs'.format(opt.model, opt.model_depth, opt.n_epochs)) if not os.path.exists(save_result_dir_name): os.mkdir(save_result_dir_name) opt.result_path = os.path.join(opt.result_path, save_result_dir_name) # For data generator opt.scales = [opt.initial_scale] for epoch in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) # Model model, parameters = generate_model(opt) # print(model) # Loss function criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() # Normalizing if not opt.no_mean_norm: opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value, dataset=opt.std_dataset) norm_method = Normalize(opt.mean, opt.std) else: norm_method = Normalize([0, 0, 0], [1, 1, 1]) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) # **************************** TRAINING CONFIGURATIONS ************************************ assert opt.train_crop in ['corner', 'center'] if opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) # Пространственное преобразование spatial_transform = Compose([ crop_method, #RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) # Временное преобразование temporal_transform = TemporalRandomCrop(opt.sample_duration) # Целевое преобразование target_transform = ClassLabel() train_loader_list = [] if not opt.no_cross_validation: annotation_list = os.listdir(opt.annotation_directory) for annotation in annotation_list: opt.annotation_path = os.path.join(opt.annotation_directory, annotation) training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) train_loader_list.append(train_loader) else: training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) train_loader_list.append(train_loader) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) optimizer = optim.SGD(parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weight_decay) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=opt.lr_patience) # ***************************** VALIDATION CONFIGURATIONS ********************************* spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() val_loader_list = [] if not opt.no_cross_validation: annotation_list = os.listdir(opt.annotation_directory) for annotation in annotation_list: opt.annotation_path = os.path.join(opt.annotation_directory, annotation) validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) val_loader_list.append(val_loader) else: validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) val_loader_list.append(val_loader) val_logger = Logger(os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc']) # **************************************** TRAINING **************************************** epoch_avg_time = AverageMeter() train_loss_list = [] train_acc_list = [] valid_acc_list = [] best_accuracy = 0 current_train_data = 0 current_valid_data = 0 opt.frequence_cross_validation = round(opt.n_epochs / opt.n_cross_validation_sets + 0.5) for epoch in range(opt.begin_epoch, opt.n_epochs + 1): epoch_start_time = time.time() print('Epoch #' + str(epoch)) # optimizer = regulate_learning_rate(optimizer, epoch, opt.frequence_regulate_lr) train_loader = train_loader_list[current_train_data] if not opt.no_cross_validation and epoch % opt.frequence_cross_validation == 0: print('\t##### Cross-validation: switch training data #####') current_train_data = (current_train_data + 1) % len(train_loader_list) train_loader = train_loader_list[current_train_data] train_loss, train_acc = train_epoch(epoch, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) val_loader = val_loader_list[current_valid_data] if not opt.no_cross_validation and epoch % opt.frequence_cross_validation == 0: print('\t##### Cross-validation: switch validation data #####') current_valid_data = (current_valid_data + 1) % len(val_loader_list) val_loader = val_loader_list[current_valid_data] validation_acc = val_epoch(epoch, val_loader, model, criterion, opt, val_logger) train_loss_list.append(train_loss) train_acc_list.append(train_acc) valid_acc_list.append(validation_acc) # Save model with best accuracy if validation_acc > best_accuracy: best_accuracy = validation_acc save_file_path = os.path.join(opt.result_path, 'best_model.pth') states = { 'epoch': epoch + 1, 'arch': opt.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(states, save_file_path) epoch_end_time = time.time() - epoch_start_time epoch_avg_time.update(epoch_end_time) print('\tTime left: ' + str(round(epoch_avg_time.avg * (opt.n_epochs - epoch) / 60, 1)) + ' minutes') # ******************************* SAVING RESULTS OF TRAINING ****************************** save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), train_loss_list, 'red', 'Loss', os.path.join(opt.result_path, 'train_loss.png')) save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), train_acc_list, 'blue', 'Accuracy', os.path.join(opt.result_path, 'train_accuracy.png')) save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), valid_acc_list, 'blue', 'Accuracy', os.path.join(opt.result_path, 'validation_accuracy.png'))