transform_post = ComposeMix([ [torchvision.transforms.ToTensor(), "img"], [ torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], # default values for imagenet std=[0.229, 0.224, 0.225]), "img" ] ]) val_data = VideoFolder( root=config['data_folder'], json_file_input=config['json_data_val'], json_file_labels=config['json_file_labels'], clip_size=config['clip_size'], nclips=config['nclips_val'], step_size=config['step_size_val'], is_val=True, transform_pre=transform_eval_pre, transform_post=transform_post, get_item_id=True, ) val_loader = torch.utils.data.DataLoader(val_data, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], pin_memory=True, drop_last=False) model.eval()
def main(): global args, best_loss # set run output folder model_name = config["model_name"] output_dir = config["output_dir"] save_dir = os.path.join(output_dir, model_name) print(" > Output folder for this run -- {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) os.makedirs(os.path.join(save_dir, 'plots')) # assign Ctrl+C signal handler signal.signal(signal.SIGINT, ExperimentalRunCleaner(save_dir)) # create model print(" > Creating model ... !") model = MultiColumn(config['num_classes'], cnn_def.Model, int(config["column_units"])) # multi GPU setting model = torch.nn.DataParallel(model, device_ids).to(device) # define optimizer lr = config["lr"] last_lr = config["last_lr"] momentum = config['momentum'] weight_decay = config['weight_decay'] optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) lr_decayer = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=2, verbose=True) # optionally resume from a checkpoint checkpoint_path = os.path.join(config['output_dir'], config['model_name'], 'model_best.pth.tar') if args.resume: if os.path.isfile(checkpoint_path): print(" > Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(checkpoint_path) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) lr_decayer.load_state_dict(checkpoint['scheduler']) optimizer.load_state_dict(checkpoint['optimizer']) # for state in optimizer.state.values(): # for k, v in state.items(): # if isinstance(v, torch.Tensor): # state[k] = v.to(device) print(" > Loaded checkpoint '{}' (epoch {})".format( checkpoint_path, checkpoint['epoch'])) else: print(" !#! No checkpoint found at '{}'".format(checkpoint_path)) elif config.get('finetune_from') is not None: print(' > Loading checkpoint to finetune') finetune_model_name = config['finetune_from'] checkpoint_path = os.path.join(config['output_dir'], finetune_model_name, 'model_best.pth.tar') checkpoint = torch.load(checkpoint_path) model.module.clf_layers = nn.Sequential( nn.Linear(model.module.column_units, 174)).to(device) model.load_state_dict(checkpoint['state_dict']) model.module.clf_layers = nn.Sequential( nn.Linear(model.module.column_units, config['num_classes'])).to(device) print(" > Loaded checkpoint '{}' (epoch {}))".format( checkpoint_path, checkpoint['epoch'])) # Freeze first 3 blocks for param in model.module.conv_column.block1.parameters(): param.requires_grad = False for param in model.module.conv_column.block2.parameters(): param.requires_grad = False for param in model.module.conv_column.block3.parameters(): param.requires_grad = False # define augmentation pipeline upscale_size_train = int(config['input_spatial_size'] * config["upscale_factor_train"]) upscale_size_eval = int(config['input_spatial_size'] * config["upscale_factor_eval"]) # Random crop videos during training transform_train_pre = ComposeMix([ [RandomRotationVideo(15), "vid"], [Scale(upscale_size_train), "img"], [RandomCropVideo(config['input_spatial_size']), "vid"], ]) # Center crop videos during evaluation transform_eval_pre = ComposeMix([ [Scale(upscale_size_eval), "img"], [torchvision.transforms.ToPILImage(), "img"], [ torchvision.transforms.CenterCrop(config['input_spatial_size']), "img" ], ]) # Transforms common to train and eval sets and applied after "pre" transforms transform_post = ComposeMix([ [torchvision.transforms.ToTensor(), "img"], [ torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], # default values for imagenet std=[0.229, 0.224, 0.225]), "img" ] ]) train_data = VideoFolder( root=config['data_folder'], json_file_input=config['json_data_train'], json_file_labels=config['json_file_labels'], clip_size=config['clip_size'], nclips=config['nclips_train'], step_size=config['step_size_train'], is_val=False, transform_pre=transform_train_pre, transform_post=transform_post, augmentation_mappings_json=config['augmentation_mappings_json'], augmentation_types_todo=config['augmentation_types_todo'], get_item_id=False, ) print(" > Using {} processes for data loader.".format( config["num_workers"])) train_loader = torch.utils.data.DataLoader( train_data, batch_size=config['batch_size'], shuffle=True, num_workers=config['num_workers'], pin_memory=True, drop_last=True) val_data = VideoFolder( root=config['data_folder'], json_file_input=config['json_data_val'], json_file_labels=config['json_file_labels'], clip_size=config['clip_size'], nclips=config['nclips_val'], step_size=config['step_size_val'], is_val=True, transform_pre=transform_eval_pre, transform_post=transform_post, get_item_id=True, ) val_loader = torch.utils.data.DataLoader(val_data, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], pin_memory=True, drop_last=False) test_data = VideoFolder( root=config['data_folder'], json_file_input=config['json_data_test'], json_file_labels=config['json_file_labels'], clip_size=config['clip_size'], nclips=config['nclips_val'], step_size=config['step_size_val'], is_val=True, transform_pre=transform_eval_pre, transform_post=transform_post, get_item_id=True, is_test=True, ) test_loader = torch.utils.data.DataLoader( test_data, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], pin_memory=True, drop_last=False) print(" > Number of dataset classes : {}".format(len(train_data.classes))) assert len(train_data.classes) == config["num_classes"] # define loss function (criterion) criterion = nn.CrossEntropyLoss().to(device) if args.eval_only: validate(val_loader, model, criterion, train_data.classes_dict) print(" > Evaluation DONE !") return # set callbacks plotter = PlotLearning(os.path.join(save_dir, "plots"), config["num_classes"]) val_loss = float('Inf') # set end condition by num epochs num_epochs = int(config["num_epochs"]) if num_epochs == -1: num_epochs = 999999 print(" > Training is getting started...") print(" > Training takes {} epochs.".format(num_epochs)) start_epoch = args.start_epoch if args.resume else 0 for epoch in range(start_epoch, num_epochs): lrs = [params['lr'] for params in optimizer.param_groups] print(" > Current LR(s) -- {}".format(lrs)) if np.max(lr) < last_lr and last_lr > 0: print(" > Training is DONE by learning rate {}".format(last_lr)) sys.exit(1) with experiment.train(): # train for one epoch train_loss, train_top1, train_top5 = train(train_loader, model, criterion, optimizer, epoch) metrics = { 'avg_loss': train_loss, 'avg_top1': train_top1, 'avg_top5': train_top5, } experiment.log_metrics(metrics) with experiment.validate(): # evaluate on validation set val_loss, val_top1, val_top5 = validate(val_loader, model, criterion) metrics = { 'avg_loss': val_loss, 'avg_top1': val_top1, 'avg_top5': val_top5, } experiment.log_metrics(metrics) experiment.log_metric('epoch', epoch) # set learning rate lr_decayer.step(val_loss, epoch) # plot learning plotter_dict = {} plotter_dict['loss'] = train_loss plotter_dict['val_loss'] = val_loss plotter_dict['acc'] = train_top1 / 100 plotter_dict['val_acc'] = val_top1 / 100 plotter_dict['learning_rate'] = lr plotter.plot(plotter_dict) print(" > Validation loss after epoch {} = {}".format(epoch, val_loss)) # remember best loss and save the checkpoint is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'arch': "Conv4Col", 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': lr_decayer.state_dict(), 'best_loss': best_loss, }, is_best, config)
def main(): global args, best_loss # set run output folder model_name = config["model_name"] output_dir = config["output_dir"] save_dir = os.path.join(output_dir, model_name) # assign Ctrl+C signal handler signal.signal(signal.SIGINT, ExperimentalRunCleaner(save_dir)) with open(f'transfer_entropy_{model_name[:-8]}.toml', 'r') as f: entropy_dict = toml.load(f) score = [float(value) for value in entropy_dict.values()] score = np.array(score) d_rate = drate mode = args.mode # create model print(" > Creating model ... !") model = cnn_def.Model(config['num_classes'], score, d_rate, mode).to(device) # optionally resume from a checkpoint checkpoint_path = os.path.join(config['output_dir'], config['model_name'], 'model_best.pth.tar') if os.path.isfile(checkpoint_path): checkpoint = torch.load(checkpoint_path) start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict'], strict=False) print(" > Loaded checkpoint '{}' (epoch {})" .format(checkpoint_path, checkpoint['epoch'])) else: print(" !#! No checkpoint found at '{}'".format( checkpoint_path)) # define augmentation pipeline upscale_size_train = int(config['input_spatial_size'] * config["upscale_factor_train"]) upscale_size_eval = int(config['input_spatial_size'] * config["upscale_factor_eval"]) # Random crop videos during training transform_train_pre = ComposeMix([ [RandomRotationVideo(15), "vid"], [Scale(upscale_size_train), "img"], [RandomCropVideo(config['input_spatial_size']), "vid"], ]) # Center crop videos during evaluation transform_eval_pre = ComposeMix([ [Scale(upscale_size_eval), "img"], [torchvision.transforms.ToPILImage(), "img"], [torchvision.transforms.CenterCrop(config['input_spatial_size']), "img"], ]) # Transforms common to train and eval sets and applied after "pre" transforms transform_post = ComposeMix([ [torchvision.transforms.ToTensor(), "img"], [torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], # default values for imagenet std=[0.229, 0.224, 0.225]), "img"] ]) train_data = VideoFolder(root=config['data_folder'], json_file_input=config['json_data_train'], json_file_labels=config['json_file_labels'], clip_size=config['clip_size'], nclips=config['nclips_train'], step_size=config['step_size_train'], is_val=False, transform_pre=transform_train_pre, transform_post=transform_post, augmentation_mappings_json=config['augmentation_mappings_json'], augmentation_types_todo=config['augmentation_types_todo'], get_item_id=False, ) print(" > Using {} processes for data loader.".format( config["num_workers"])) train_loader = torch.utils.data.DataLoader( train_data, batch_size=config['batch_size'], shuffle=True, num_workers=config['num_workers'], pin_memory=True, drop_last=True) val_data = VideoFolder(root=config['data_folder'], json_file_input=config['json_data_val'], json_file_labels=config['json_file_labels'], clip_size=config['clip_size'], nclips=config['nclips_val'], step_size=config['step_size_val'], is_val=True, transform_pre=transform_eval_pre, transform_post=transform_post, get_item_id=True, ) val_loader = torch.utils.data.DataLoader( val_data, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], pin_memory=True, drop_last=False) test_data = VideoFolder(root=config['data_folder'], json_file_input=config['json_data_test'], json_file_labels=config['json_file_labels'], clip_size=config['clip_size'], nclips=config['nclips_val'], step_size=config['step_size_val'], is_val=True, transform_pre=transform_eval_pre, transform_post=transform_post, get_item_id=True, is_test=True, ) test_loader = torch.utils.data.DataLoader( test_data, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], pin_memory=True, drop_last=False) print(" > Number of dataset classes : {}".format(len(train_data.classes))) # assert len(train_data.classes) == config["num_classes"] # define loss function (criterion) criterion = nn.CrossEntropyLoss().to(device) # define optimizer lr = config["lr"] last_lr = config["last_lr"] momentum = config['momentum'] weight_decay = config['weight_decay'] optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) # set callbacks lr_decayer = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'min', factor=0.5, patience=2, verbose=True) val_loss = float('Inf') best_top1 = 0.0 # set end condition by num epochs num_epochs = 60 if num_epochs == -1: num_epochs = 999999 print(" > Training is getting started...") print(" > Training takes {} epochs.".format(num_epochs)) for epoch in range(start_epoch, num_epochs): lrs = [params['lr'] for params in optimizer.param_groups] print(" > Current LR(s) -- {}".format(lrs)) if np.max(lr) < last_lr and last_lr > 0: print(" > Training is DONE by learning rate {}".format(last_lr)) sys.exit(1) # train for one epoch train_loss, train_top1, train_top5 = train( train_loader, model, criterion, optimizer, epoch) # evaluate on validation set val_loss, val_top1, val_top5 = validate(val_loader, model, criterion) # set learning rate lr_decayer.step(val_loss, epoch) if best_top1 < val_top1: best_top1 = val_top1 acc_filename = 'retrain_te_acc.toml' with open(acc_filename, 'r') as f: accs = toml.load(f) accs[f'{config_name}_{args.mode}_{str(d_rate)}'] = best_top1 with open(acc_filename,'w') as f: toml.dump(accs, f)
def main(): global args, best_loss # set run output folder config['model_id'] = '_'.join([config["model_name"], args.job_identifier]) wandb.init(project="cross-dataset-generalization", config=config) output_dir = config["output_dir"] save_dir = os.path.join(output_dir, config['model_id']) print(" > Output folder for this run -- {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) os.makedirs(os.path.join(save_dir, 'plots')) # assign Ctrl+C signal handler signal.signal(signal.SIGINT, utils.ExperimentalRunCleaner(save_dir)) # create model print(" > Creating model ... !") if '3D' in config['model_name']: model = MultiColumn(config['num_classes'], model_def.Model, int(config["column_units"])) # multi GPU setting model = torch.nn.DataParallel(model, device_ids).to(device) input_size = (config['batch_size'], 3, config['clip_size'], config['input_spatial_size'], config['input_spatial_size']) seq_first = False else: model = model_def.ConvLSTMModel(config=config) input_size = (config['clip_size'], config['batch_size'], 3, config['input_spatial_size'], config['input_spatial_size']) seq_first = True # Print model summary # ts_summary(model, input_size=input_size) # optionally resume from a checkpoint if args.resume: if os.path.isfile(config['checkpoint_path']): print(" > Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(config['checkpoint_path']) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) print(" > Loaded checkpoint '{}' (epoch {})".format( config['checkpoint_path'], checkpoint['epoch'])) else: print(" !#! No checkpoint found at '{}'".format( config['checkpoint_path'])) # define augmentation pipeline upscale_size_train = int(config['input_spatial_size'] * config["upscale_factor_train"]) upscale_size_eval = int(config['input_spatial_size'] * config["upscale_factor_eval"]) # Random crop videos during training transform_train_pre = ComposeMix([ [RandomRotationVideo(15), "vid"], [Scale(upscale_size_train), "img"], [RandomCropVideo(config['input_spatial_size']), "vid"], ]) # Center crop videos during evaluation transform_eval_pre = ComposeMix([ [Scale(upscale_size_eval), "img"], [torchvision.transforms.ToPILImage(), "img"], [ torchvision.transforms.CenterCrop(config['input_spatial_size']), "img" ], ]) # Transforms common to train and eval sets and applied after "pre" transforms transform_post = ComposeMix([ [torchvision.transforms.ToTensor(), "img"], [ torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], # default values for imagenet std=[0.229, 0.224, 0.225]), "img" ] ]) train_val_data = VideoFolder( root=config['data_folder'], json_file_input=config['json_data_train'], json_file_labels=config['json_file_labels'], clip_size=config['clip_size'], nclips=config['nclips_train_val'], step_size=config['step_size_train_val'], is_val=False, transform_pre=transform_train_pre, transform_post=transform_post, augmentation_mappings_json=config['augmentation_mappings_json'], augmentation_types_todo=config['augmentation_types_todo'], get_item_id=True, seq_first=seq_first) train_data, val_data = torch.utils.data.random_split( train_val_data, [config['nb_train_samples'], config['nb_val_samples']], generator=torch.Generator().manual_seed(42)) print(" > Using {} processes for data loader.".format( config["num_workers"])) train_loader = torch.utils.data.DataLoader( train_data, batch_size=config['batch_size'], shuffle=True, num_workers=config['num_workers'], pin_memory=True, drop_last=True) val_loader = torch.utils.data.DataLoader(val_data, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], pin_memory=True, drop_last=False) test_data = VideoFolder( root=config['data_folder'], json_file_input=config['json_data_test'], json_file_labels=config['json_file_labels'], clip_size=config['clip_size'], nclips=config['nclips_test'], step_size=config['step_size_test'], is_val=True, transform_pre=transform_eval_pre, transform_post=transform_post, get_item_id=True, is_test=True, ) test_loader = torch.utils.data.DataLoader( test_data, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], pin_memory=True, drop_last=False) # print(" > Number of dataset classes : {}".format(len(train_data.classes))) # assert len(train_data.classes) == config["num_classes"] # define loss function (criterion) criterion = nn.CrossEntropyLoss().to(device) # define optimizer lr = config["lr"] last_lr = config["last_lr"] momentum = config['momentum'] weight_decay = config['weight_decay'] optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) if args.eval_only: validate(test_loader, model, criterion, train_data.classes_dict) print(" > Evaluation DONE !") return # set callbacks # plotter = PlotLearning(os.path.join( # save_dir, "plots"), config["num_classes"]) lr_decayer = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=2, verbose=True) val_loss = float('Inf') # set end condition by num epochs num_epochs = int(config["num_epochs"]) if num_epochs == -1: num_epochs = 999999 print(" > Training is getting started...") print(" > Training takes {} epochs.".format(num_epochs)) start_epoch = args.start_epoch if args.resume else 0 for epoch in range(start_epoch, num_epochs): lrs = [params['lr'] for params in optimizer.param_groups] print(" > Current LR(s) -- {}".format(lrs)) if np.max(lr) < last_lr and last_lr > 0: print(" > Training is DONE by learning rate {}".format(last_lr)) sys.exit(1) wandb.log({'epoch': epoch}) # train for one epoch train_loss, train_top1, train_top5 = train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set val_loss, val_top1, val_top5 = validate(val_loader, model, criterion, which_split='val') # set learning rate lr_decayer.step(val_loss, epoch) # # plot learning # plotter_dict = {} # plotter_dict['loss'] = train_loss # plotter_dict['val_loss'] = val_loss # plotter_dict['acc'] = train_top1 / 100 # plotter_dict['val_acc'] = val_top1 / 100 # plotter_dict['learning_rate'] = lr # plotter.plot(plotter_dict) print(" > Validation loss after epoch {} = {}".format(epoch, val_loss)) # remember best loss and save the checkpoint is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) utils.save_checkpoint( { 'epoch': epoch + 1, 'arch': "Conv4Col", 'state_dict': model.state_dict(), 'best_loss': best_loss, }, is_best, config) test_loss, test_top1, test_top5 = validate(test_loader, model, criterion, which_split='test')