def main(): progress = default_progress() experiment_dir = 'experiment/filt4_resnet' # Here's our data train_loader = torch.utils.data.DataLoader(CachedImageFolder( 'dataset/miniplaces/simple/train', transform=transforms.Compose([ transforms.Resize(128), transforms.RandomCrop(112), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(IMAGE_MEAN, IMAGE_STDEV), ])), batch_size=32, shuffle=True, num_workers=24, pin_memory=True) val_loader = torch.utils.data.DataLoader( CachedImageFolder( 'dataset/miniplaces/simple/val', transform=transforms.Compose([ transforms.Resize(128), # transforms.CenterCrop(112), transforms.ToTensor(), transforms.Normalize(IMAGE_MEAN, IMAGE_STDEV), ])), batch_size=32, shuffle=False, num_workers=24, pin_memory=True) # Create a simplified ResNet with half resolution. model = CustomResNet(18, num_classes=100, halfsize=True, extra_output=['maxpool']) # right after conv1 model.train() model.cuda() # An abbreviated training schedule: 40000 batches. # TODO: tune these hyperparameters. # init_lr = 0.002 init_lr = 1e-4 # max_iter = 40000 - 34.5% @1 # max_iter = 50000 - 37% @1 # max_iter = 80000 - 39.7% @1 # max_iter = 100000 - 40.1% @1 max_iter = 50000 criterion = FiltDoubleBackpropLoss(1e4) optimizer = torch.optim.Adam(model.parameters()) iter_num = 0 best = dict(val_accuracy=0.0) model.train() # Oh, hold on. Let's actually resume training if we already have a model. checkpoint_filename = 'miniplaces.pth.tar' best_filename = 'best_%s' % checkpoint_filename best_checkpoint = os.path.join(experiment_dir, best_filename) try_to_resume_training = False if try_to_resume_training and os.path.exists(best_checkpoint): checkpoint = torch.load(os.path.join(experiment_dir, best_filename)) iter_num = checkpoint['iter'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) best['val_accuracy'] = checkpoint['accuracy'] def save_checkpoint(state, is_best): filename = os.path.join(experiment_dir, checkpoint_filename) ensure_dir_for(filename) torch.save(state, filename) if is_best: shutil.copyfile(filename, os.path.join(experiment_dir, best_filename)) def validate_and_checkpoint(): model.eval() # val_loss, val_acc = AverageMeter(), AverageMeter() val_acc = AverageMeter() for input, target in progress(val_loader): # Load data input_var, target_var = [d.cuda() for d in [input, target]] # Evaluate model with torch.no_grad(): output = model(input_var) # loss, unreg_loss = criterion(output, target_var) _, pred = output[0].max(1) accuracy = (target_var.eq(pred) ).data.float().sum().item() / input.size(0) # val_loss.update(loss.data.item(), input.size(0)) val_acc.update(accuracy, input.size(0)) # Check accuracy # post_progress(l=val_loss.avg, a=val_acc.avg*100.0) post_progress(a=val_acc.avg * 100.0) # Save checkpoint save_checkpoint( { 'iter': iter_num, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'accuracy': val_acc.avg, # 'loss': val_loss.avg, }, val_acc.avg > best['val_accuracy']) best['val_accuracy'] = max(val_acc.avg, best['val_accuracy']) print_progress('Iteration %d val accuracy %.2f' % (iter_num, val_acc.avg * 100.0)) # Here is our training loop. while iter_num < max_iter: for input, target in progress(train_loader): # Track the average training loss/accuracy for each epoch. train_loss, train_acc = AverageMeter(), AverageMeter() train_loss_u = AverageMeter() train_loss_g = AverageMeter() # Load data input_var, target_var = [d.cuda() for d in [input, target]] # Evaluate model output = model(input_var) loss, unreg_loss, grad_loss = criterion(output, target_var) train_loss.update(loss.data.item(), input.size(0)) train_loss_u.update(unreg_loss.data.item(), input.size(0)) train_loss_g.update(grad_loss.data.item(), input.size(0)) # Perform one step of SGD optimizer.zero_grad() loss.backward() optimizer.step() # Also check training set accuracy _, pred = output[0].max(1) accuracy = (target_var.eq(pred)).data.float().sum().item() / ( input.size(0)) train_acc.update(accuracy) remaining = 1 - iter_num / float(max_iter) post_progress(g=train_loss_g.avg, u=train_loss_u.avg, a=train_acc.avg * 100.0) # Advance iter_num += 1 if iter_num >= max_iter: break # Linear learning rate decay lr = init_lr * remaining for param_group in optimizer.param_groups: param_group['lr'] = lr # Ocassionally check validation set accuracy and checkpoint if iter_num % 1000 == 0: validate_and_checkpoint() model.train()
def main(): progress = default_progress() experiment_dir = 'experiment/positive_resnet' # Here's our data train_loader = torch.utils.data.DataLoader(CachedImageFolder( 'dataset/miniplaces/simple/train', transform=transforms.Compose([ transforms.Resize(128), transforms.RandomCrop(112), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(IMAGE_MEAN, IMAGE_STDEV), ])), batch_size=32, shuffle=True, num_workers=24, pin_memory=True) val_loader = torch.utils.data.DataLoader( CachedImageFolder( 'dataset/miniplaces/simple/val', transform=transforms.Compose([ transforms.Resize(128), # transforms.CenterCrop(112), transforms.ToTensor(), transforms.Normalize(IMAGE_MEAN, IMAGE_STDEV), ])), batch_size=32, shuffle=False, num_workers=24, pin_memory=True) # Create a simplified ResNet with half resolution. model = CustomResNet(18, num_classes=100, halfsize=True) checkpoint_filename = 'best_miniplaces.pth.tar' best_checkpoint = os.path.join('experiment/resnet', checkpoint_filename) checkpoint = torch.load(best_checkpoint) model.load_state_dict(checkpoint['state_dict']) model.train() model.cuda() # An abbreviated training schedule: 40000 batches. # TODO: tune these hyperparameters. # init_lr = 0.002 init_lr = 1e-4 # max_iter = 40000 - 34.5% @1 # max_iter = 50000 - 37% @1 # max_iter = 80000 - 39.7% @1 # max_iter = 100000 - 40.1% @1 max_iter = 50000 criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.Adam(model.parameters()) iter_num = 0 best = dict(val_accuracy=0.0) model.train() # Oh, hold on. Let's actually resume training if we already have a model. checkpoint_filename = 'miniplaces.pth.tar' best_filename = 'best_%s' % checkpoint_filename best_checkpoint = os.path.join(experiment_dir, best_filename) try_to_resume_training = False if try_to_resume_training and os.path.exists(best_checkpoint): checkpoint = torch.load(os.path.join(experiment_dir, best_filename)) iter_num = checkpoint['iter'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) best['val_accuracy'] = checkpoint['accuracy'] def save_checkpoint(state, is_best): filename = os.path.join(experiment_dir, checkpoint_filename) ensure_dir_for(filename) torch.save(state, filename) if is_best: shutil.copyfile(filename, os.path.join(experiment_dir, best_filename)) def validate_and_checkpoint(): model.eval() val_loss, val_acc = AverageMeter(), AverageMeter() for input, target in progress(val_loader): # Load data input_var, target_var = [d.cuda() for d in [input, target]] # Evaluate model with torch.no_grad(): output = model(input_var) loss = criterion(output, target_var) _, pred = output.max(1) accuracy = (target_var.eq(pred) ).data.float().sum().item() / input.size(0) val_loss.update(loss.data.item(), input.size(0)) val_acc.update(accuracy, input.size(0)) # Check accuracy post_progress(l=val_loss.avg, a=val_acc.avg) # Save checkpoint save_checkpoint( { 'iter': iter_num, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'accuracy': val_acc.avg, 'loss': val_loss.avg, }, val_acc.avg > best['val_accuracy']) best['val_accuracy'] = max(val_acc.avg, best['val_accuracy']) print_progress('Iteration %d val accuracy %.2f' % (iter_num, val_acc.avg * 100.0)) # Here is our training loop. while iter_num < max_iter: for input, target in progress(train_loader): if iter_num % 1000 == 0: # Every 1000 turns chop down the negative params neg_means = [] pos_means = [] neg_count = 0 param_count = 0 with torch.no_grad(): for name, param in model.named_parameters(): if all(n in name for n in ['layer4', 'conv', 'weight']): pc = param.numel() neg = (param < 0) nc = neg.int().sum().item() param_count += pc neg_count += nc if nc > 0: neg_means.append(param[neg].mean().item()) if nc < pc: pos_means.append(param[~neg].mean().item()) param[neg] *= 0.5 print_progress( '%d/%d neg, mean %e vs %e pos' % (neg_count, param_count, sum(neg_means) / len(neg_means), sum(pos_means) / len(pos_means))) # Track the average training loss/accuracy for each epoch. train_loss, train_acc = AverageMeter(), AverageMeter() # Load data input_var, target_var = [d.cuda() for d in [input, target]] # Evaluate model output = model(input_var) loss = criterion(output, target_var) train_loss.update(loss.data.item(), input.size(0)) # Perform one step of SGD optimizer.zero_grad() loss.backward() optimizer.step() # Also check training set accuracy _, pred = output.max(1) accuracy = (target_var.eq(pred)).data.float().sum().item() / ( input.size(0)) train_acc.update(accuracy) remaining = 1 - iter_num / float(max_iter) post_progress(l=train_loss.avg, a=train_acc.avg, v=best['val_accuracy']) # Advance iter_num += 1 if iter_num >= max_iter: break # Linear learning rate decay lr = init_lr * remaining for param_group in optimizer.param_groups: param_group['lr'] = lr # Ocassionally check validation set accuracy and checkpoint if iter_num % 1000 == 0: validate_and_checkpoint() model.train()