def test_params_resnet(full_pca_img, partial_pca_img, mask, loss_name, grad_clipping, num_blocks, num_channels): start_time = time.time() img_var = np_to_torch(partial_pca_img).type(dtype) mask_var = np_to_torch(mask).type(dtype) LR = 0.01 INPUT = 'noise' input_depth = partial_pca_img.shape[0] output_depth = partial_pca_img.shape[0] net = ResNet(input_depth, output_depth, num_blocks, num_channels, act_fun='LeakyReLU') net = net.type(dtype) net_input = get_noise(input_depth, INPUT, partial_pca_img.shape[1:],var=1).type(dtype) net_input_saved = net_input.detach().clone() noise = net_input.detach().clone() net_input = net_input_saved optimizer = torch.optim.AdamW(net.parameters(), lr=LR) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, verbose=False, patience=100, threshold=0.0005, threshold_mode='rel', cooldown=0, min_lr=5e-6) for j in range(num_iter): out = net(net_input) optimizer.zero_grad() if loss_name == 'mse': mse = torch.nn.MSELoss().type(dtype) total_loss = mse(out * mask_var, img_var * mask_var) elif loss_name == 'master_metric': total_loss = -master_metric((out * mask_var), (img_var * mask_var), 1, 1, 1, 'product') else: raise ValueError("Input a correct loss name (among 'mse' | 'master_metric'") total_loss.backward() if grad_clipping: for param in net.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() scheduler.step(total_loss) out_np = torch_to_np(out) elapsed = time.time() - start_time return get_final_metrics(out_np,full_pca_img), elapsed
def main(): loader = prepare_cifar10() last_epoch = 0 # model = GoogleNet(mode='improved', aux=False).to(device) model = ResNet(layer_num='50').to(device) model_name = model.__class__.__name__ + '_' + model.mode criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4) if pretrained is not None: print('load %s...' % pretrained) checkpoint = torch.load(os.path.join('./saved_models', pretrained)) pattern = r'_[0-9]+\.' last_epoch = int(re.findall(pattern, pretrained)[-1][1:-1]) if device.type == 'cuda': load_parallel_state_dict(model, checkpoint['state_dict']) else: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print('loading pretrained model finished') hyperparameters = { 'batch_size': batch_size, 'learning_rate': learning_rate, 'num_epochs': num_epochs, 'optimizer': optimizer, 'loss_function': criterion } settings = { 'print_every': print_every, 'verbose': verbose, 'save_log': is_log, 'start_epoch': last_epoch + 1, 'save_model': save_frequency, 'name': model_name, 'device': device } trainer = ResNetTrainer(model, loader, hyperparameters, settings) # trainer = GoogleNetTrainer(model, loader, hyperparameters, settings) if is_train: trainer.train() else: trainer.test()
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data print('==> Preparing dataset %s' % args.dataset) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) if args.dataset == 'cifar10': dataloader = datasets.CIFAR10 num_classes = 10 else: dataloader = datasets.CIFAR100 num_classes = 100 trainset = dataloader(root='./data', train=True, download=True, transform=transform_train) trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers) testset = dataloader(root='./data', train=False, download=False, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # Model print("==> creating model") model = ResNet( num_classes=num_classes, depth=args.depth, norm_type=args.norm, basicblock=args.basicblock, ) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True print(model) print(' Total params: %.4fM' % (sum(p.numel() for p in model.parameters())/1000000.0)) criterion = nn.CrossEntropyLoss() optimizer = set_optimizer(model, args) # Resume title = '{}-ResNet-{}-{}'.format(args.dataset, args.depth, args.norm) if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.', 'Train Acc.5', 'Valid Acc.5']) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc, train_acc5 = train(trainloader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc, test_acc5 = test(testloader, model, criterion, epoch, use_cuda) # append logger file logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc, train_acc5, test_acc5]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() print('Best acc:') print(best_acc)
def main(args): transform = getTransforms() data_path = os.path.join('data', args.data) if not os.path.exists(data_path): print('ERROR: No dataset named {}'.format(args.data)) exit(1) trainset = BaseDataset(list_path=os.path.join(data_path, 'train.lst'), transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=1) testset = BaseDataset(list_path=os.path.join(data_path, 'val.lst'), transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=args.val_batch, shuffle=True, num_workers=1) model = ResNet(num_layers=18, num_classes=args.num_classes, pretrained=True).to(DEVICE) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9) summary(model, input_size=(3, 32, 32)) max_epoch = args.max_epoch last_epoch = 0 best_val_loss = None best_accuracy = None train_losses = [] val_losses = [] accuracies = [] output_dir = os.path.join('outputs', args.data) model_state_file = os.path.join(output_dir, 'checkpoint.pth.tar') os.makedirs(output_dir, exist_ok=True) if os.path.exists(model_state_file): checkpoint = torch.load(model_state_file) last_epoch = checkpoint['epoch'] best_val_loss = checkpoint['best_val_loss'] best_accuracy = checkpoint['best_accuracy'] train_losses = checkpoint['train_losses'] val_losses = checkpoint['val_losses'] accuracies = checkpoint['accuracies'] model.load_state_dict(checkpoint['state_dict'], strict=False) optimizer.load_state_dict(checkpoint['optimizer']) print('=> loaded checkpoint (epoch {})'.format(last_epoch)) for epoch in range(last_epoch, max_epoch): print('Epoch {}'.format(epoch)) train_loss = train(model=model, dataloader=trainloader, criterion=criterion, optimizer=optimizer, device=DEVICE) val_loss = val(model=model, dataloader=testloader, criterion=criterion, device=DEVICE) accuracy = test(model=model, dataloader=testloader, device=DEVICE) train_losses.append(train_loss) val_losses.append(val_loss) accuracies.append(accuracy) print('Loss: train = {}, val = {}, acc. = {}'.format( train_loss, val_loss, accuracy)) # if best_val_loss is None or val_loss < best_val_loss: # best_val_loss = val_loss # torch.save( # model.state_dict(), # os.path.join(output_dir, 'best.pth') # ) if best_accuracy is None or accuracy > best_accuracy: best_accuracy = accuracy torch.save(model.state_dict(), os.path.join(output_dir, 'best.pth')) print('=> saving checkpoint to {}'.format(model_state_file)) torch.save( { 'epoch': epoch + 1, 'best_val_loss': best_val_loss, 'best_accuracy': best_accuracy, 'train_losses': train_losses, 'val_losses': val_losses, 'accuracies': accuracies, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, model_state_file) if (epoch + 1) % 100 == 0: # plt.plot(range(epoch+1), train_losses, label="train") # plt.plot(range(epoch+1), val_losses, label="val") # plt.yscale('log') # plt.legend() # plt.savefig(os.path.join(output_dir, 'losses.png')) # plt.clf() fig, ax1 = plt.subplots() ax2 = ax1.twinx() ax1.plot(range(epoch + 1), train_losses, label='train') ax1.plot(range(epoch + 1), val_losses, label='val') ax1.set_xscale('log') ax1.set_yscale('log') ax2.plot(range(epoch + 1), accuracies, color='red', label='accuracy') ax2.set_xscale('log') handler1, label1 = ax1.get_legend_handles_labels() handler2, label2 = ax2.get_legend_handles_labels() ax1.legend(handler1 + handler2, label1 + label2, loc=3, borderaxespad=0.) plt.savefig(os.path.join(output_dir, 'losses.png')) plt.clf()
def main(): # for repeatable experiments cudnn.benchmark = False cudnn.deterministic = True np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed(0) # options opt = Opts().parse() # dataset loader (train) if opt.dataset_train == 'h36m': train_loader = torch.utils.data.DataLoader( H36M17(opt.protocol, 'train', True, opt.scale, opt.noise, opt.std_train, opt.std_test, opt.noise_path), batch_size=opt.batch_size, shuffle=True, num_workers=int(conf.num_threads)) elif opt.dataset_train == 'inf': train_loader = torch.utils.data.DataLoader( MPIINF('train', opt.noise, opt.std_train, opt.std_test, opt.noise_path), batch_size=opt.batch_size, shuffle=True, num_workers=int(conf.num_threads)) elif opt.dataset_train == 'h36m_inf': train_loader = torch.utils.data.DataLoader(H36M17_MPIINF('train', opt), batch_size=opt.batch_size, shuffle=True, num_workers=int( conf.num_threads)) else: raise ValueError('unsupported dataset %s' % opt.dataset_train) # dataset loader (valid) if opt.dataset_test == 'h36m': val_loader = torch.utils.data.DataLoader( H36M17(opt.protocol, 'val', False, False, opt.noise, opt.std_train, opt.std_test), batch_size=opt.batch_size, shuffle=False, num_workers=int(conf.num_threads)) elif opt.dataset_test == 'inf': val_loader = torch.utils.data.DataLoader( MPIINF('val', opt.noise, opt.std_train, opt.std_test), batch_size=opt.batch_size, shuffle=False, num_workers=int(conf.num_threads)) else: raise ValueError('unsupported dataset %s' % opt.dataset_test) # model if opt.network == 'resnet': model = ResNet(opt.mode, conf.num_joints, opt.num_layers, opt.num_features).cuda() else: raise ValueError('unsupported model %s' % opt.network) # multi-gpu if opt.multi_gpu == True: model = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) else: model = torch.nn.DataParallel(model, device_ids=[0]) # optimizer if opt.opt_method == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=opt.lr) elif opt.opt_method == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) else: raise ValueError('unsupported optimizer %s' % opt.opt_method) # scheduler scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[200, 300], gamma=0.1) # log log = [] log.append([]) # epoch log.append([]) # cost (train) log.append([]) # error3d1 (train) log.append([]) # error3d2 (train) log.append([]) # cost (val) log.append([]) # error3d1 (val) log.append([]) # error3d2 (val) # load model idx_start = opt.num_epochs while idx_start > 0: file_name = os.path.join(opt.save_dir, 'model_{}.pth'.format(idx_start)) if os.path.exists(file_name): state = torch.load(file_name) model.load_state_dict(state['model']) optimizer.load_state_dict(state['optimizer']) scheduler.load_state_dict(state['scheduler']) log_name = os.path.join(opt.save_dir, 'log_{}.pkl'.format(idx_start)) if os.path.exists(log_name): with open(log_name, 'rb') as fin: log = pickle.load(fin) break idx_start -= 1 # logger if idx_start == 0: logger = Logger(opt.save_dir + '/logs') else: logger = Logger(opt.save_dir + '/logs', reset=False) # train epoch = idx_start + 1 for epoch in range(idx_start + 1, opt.num_epochs + 1): # for repeatable experiments np.random.seed(epoch) torch.manual_seed(epoch) torch.cuda.manual_seed(epoch) # do scheduler scheduler.step() # perform one epoch of training cost_train, error3d1_train, error3d2_train = train( epoch, opt, train_loader, model, optimizer) logger.scalar_summary('cost_train', cost_train, epoch) logger.scalar_summary('error3d1_train', error3d1_train, epoch) logger.scalar_summary('error3d2_train', error3d2_train, epoch) # perform one epoch of validation with torch.no_grad(): cost_val, error3d1_val, error3d2_val = val(epoch, opt, val_loader, model) logger.scalar_summary('cost_val', cost_val, epoch) logger.scalar_summary('error3d1_val', error3d1_val, epoch) logger.scalar_summary('error3d2_val', error3d2_val, epoch) # print message to log file logger.write('%d %1.1e | %.4f %.4f %.4f | %.4f %.4f %.4f\n' % (epoch, optimizer.param_groups[0]['lr'], cost_train, error3d1_train, error3d2_train, cost_val, error3d1_val, error3d2_val)) # log[0].append(epoch) log[1].append(cost_train) log[2].append(error3d1_train) log[3].append(error3d2_train) log[4].append(cost_val) log[5].append(error3d1_val) log[6].append(error3d2_val) # save model state = { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() } if epoch % opt.save_intervals == 0: torch.save( state, os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch))) log_name = os.path.join(opt.save_dir, 'log_{}.pkl'.format(epoch)) with open(log_name, 'wb') as fout: pickle.dump(log, fout) logger.close() # save final model file_name = os.path.join(opt.save_dir, 'final_model.pth') torch.save(state, file_name) # save final log log_name = os.path.join(opt.save_dir, 'final_log.pkl') with open(log_name, 'wb') as fout: pickle.dump(log, fout) # plotting x = range(1, opt.num_epochs + 1) cost_train = np.array(log[1]) error3d1_train = np.array(log[2]) error3d2_train = np.array(log[3]) cost_val = np.array(log[4]) error3d1_val = np.array(log[5]) error3d2_val = np.array(log[6]) fig, ax = plt.subplots() ax.plot(x, cost_train, 'r') ax.plot(x, cost_val, 'b') ax.set(xlabel='epoch', ylabel='cost', title='cost') plt.legend(('cost_train', 'cost_val')) ax.grid() fig.savefig(os.path.join(opt.save_dir, 'cost.png')) fig, ax = plt.subplots() ax.plot(x, error3d1_train, 'r') ax.plot(x, error3d2_train, 'm') ax.plot(x, error3d1_val, 'b') ax.plot(x, error3d2_val, 'c') ax.set(xlabel='epoch', ylabel='error3d', title='3D error (mm)') plt.legend( ('error3d1_train', 'error3d2_train', 'error3d1_val', 'error3d2_val')) ax.grid() fig.savefig(os.path.join(opt.save_dir, 'error3d.png')) #--------------------------------------------------------------------------- # dataset loader (test) if opt.dataset_test == 'h36m': test_loader = torch.utils.data.DataLoader( H36M17(opt.protocol, 'test', True, False, opt.noise, opt.std_train, opt.std_test), batch_size=opt.batch_size, shuffle=False, num_workers=int(conf.num_threads)) elif opt.dataset_test == 'inf': test_loader = torch.utils.data.DataLoader( MPIINF('val', opt.noise, opt.std_train, opt.std_test), batch_size=opt.batch_size, shuffle=False, num_workers=int(conf.num_threads)) else: raise ValueError('unsupported dataset %s' % opt.dataset_test) # final evaluation with torch.no_grad(): cost_final, error3d1_final, error3d2_final = test( epoch, opt, test_loader, model)
# model = Shallow() # model = LargeResNet() model = ResNet() model = model.to(device) # Load saved model parameters (if pre-trained) if not train_mode: map_loc = "cuda:0" if torch.cuda.is_available() else "cpu" state_dict = torch.load(os.path.join(weight_path, "resnet_lr4_ep80"), map_location=map_loc) model.load_state_dict(state_dict) # Loss function criterion = nn.CrossEntropyLoss() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate) # Learning rate scheduler scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) # Train one epoch def train(epoch): model.train() train_loss = 0 for data, label in tqdm(train_loader): data = data.to(device) label = label.to(device) pred = model(data) optimizer.zero_grad() loss = criterion(pred, label.long()) loss.backward()
# creating the data-loaders train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=4, shuffle=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=4, shuffle=False) # # initializing our network net = ResNet(args.depth, in_channels=1, output=3) net.apply(conv_init) print(net) if is_use_cuda: net.to(device) net = nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) # change loss criterion criterion = nn.L1Loss() print("Number of parameters in the network: {}".format(sum(p.numel() for p in net.parameters()))) def train(epoch): net.train() train_loss = 0 optimizer = optim.Adam(net.parameters(), lr=lr_schedule(lr, epoch)) print('Training Epoch: #%d, LR: %.5f' % (epoch, lr_schedule(lr, epoch))) for idx, (inputs, labels) in enumerate(train_loader): if is_use_cuda: inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward()