def main(): global args, best_prec1, weight_decay, momentum inception_exists = os.path.isfile(get_checkpoint_path(INCEPTION_FILENAME)) model = ITrackerModel(use_pretrained=not inception_exists) if use_cuda: model = torch.nn.DataParallel(model) model = try_cuda(model) imSize=(224, 224) cudnn.benchmark = True epoch = 0 minibatch = 0 if doLoad: saved = load_checkpoint() if saved: print('Loading checkpoint for epoch %05d with error %.5f...' % (saved['epoch'], saved['best_prec1'])) state_dict = model.state_dict() state = saved['state_dict'] if isinstance(model, torch.nn.DataParallel): temp_state = OrderedDict() for k, v in state.items(): if not k.startswith('module.'): k = 'module.' + k temp_state[k] = v state = temp_state else: temp_state = OrderedDict() for k, v in state.items(): if k.startswith('module.'): k = k[len('module.'):] temp_state[k] = v state = temp_state if not os.path.isfile(get_checkpoint_path(INCEPTION_FILENAME)): # Delete the connected layers if not the Inception file because # the modified network does not have these. if 'eyesFC.0.weight' in state: del state['eyesFC.0.weight'] if 'module.eyesFC.0.weight' in state: del state['module.eyesFC.0.weight'] if 'eyesFC.0.bias' in state: del state['eyesFC.0.bias'] if 'module.eyesFC.0.bias' in state: del state['module.eyesFC.0.bias'] state_dict.update(state) try: model.module.load_state_dict(state_dict) except: model.load_state_dict(state_dict) epoch = saved['epoch'] best_prec1 = saved['best_prec1'] if 'minibatch' in saved: minibatch = saved['minibatch'] else: print('Warning: Could not read checkpoint!') dataTrain = ITrackerData(split='train', imSize = imSize) dataVal = ITrackerData(split='val', imSize = imSize) # Gotta mess with the train. train_loader = torch.utils.data.DataLoader( dataTrain, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( dataVal, batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) criterion = try_cuda(nn.MSELoss()) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr, momentum=momentum, weight_decay=weight_decay) # Quick test if doTest: validate(val_loader, model, criterion, epoch) return # There's a bug here which causes epoch = epoch - 1. However, the checkpoint is already # saved with the higher number epoch, so we'll just always add 1 when saving the epoch instead. for epoch in range(0, epoch): adjust_learning_rate(optimizer, epoch) for epoch in range(epoch, epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, minibatch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1, set minibatch to zero, and save checkpoint is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) minibatch = 0 save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): # global args, best_prec1, weight_decay, momentum model = MobileGaze(bn_momentum).cuda() for name, value in model.named_parameters(): print("({}, {})".format(name, value.requires_grad)) print("Total number of model parameters: ", sum(p.numel() for p in model.parameters())) print("Number of trainable parameters: ", sum(p.numel() for p in model.parameters() if p.requires_grad)) model = torch.nn.DataParallel(model) imSize = (224, 224) cudnn.benchmark = True epoch = 0 best_prec1 = int(1e20) if doLoad: saved = load_checkpoint(saved_model_path) if saved: print( 'Loading checkpoint for epoch %05d with l2 error %.5f (which is the actual linear error)...' % (saved['epoch'], saved['best_prec1'])) state = saved['state_dict'] try: model.module.load_state_dict(state) except: model.load_state_dict(state) epoch = saved['epoch'] + 1 # if 'temp' in saved_model_path: epoch -= 1 #Model loaded from emergency ckpt best_prec1 = saved['best_prec1'] else: print('Warning: Could not read checkpoint!') else: print("Training model from scratch.") dataTrain = ITrackerData(dataPath=args.data_path, split='train', imSize=imSize) dataVal = ITrackerData(dataPath=args.data_path, split='val', imSize=imSize) dataTest = ITrackerData(dataPath=args.data_path, split='test', imSize=imSize) val_loader = torch.utils.data.DataLoader(dataVal, batch_size=val_batch, shuffle=True, num_workers=workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataTest, batch_size=val_batch, shuffle=False, num_workers=workers, pin_memory=True) train_loader = torch.utils.data.DataLoader(dataTrain, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) print("Size of training, validation and test sets: {}, {}, {}".format( batch_size * len(train_loader), val_batch * len(val_loader), val_batch * len(test_loader))) criterion = nn.SmoothL1Loss().cuda( ) if criterion_name == 'huber' else nn.MSELoss().cuda() print("Criterion used: {}".format(criterion_name)) # Test mode if doTest: test_mean = validate(test_loader, model, criterion, epoch) print(test_mean) return optimizer = torch.optim.Adam(model.parameters(), base_lr, betas=adam_betas) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() l2_errors = AverageMeter() loss_list = [] start_epoch = epoch for epoch in range(start_epoch, epochs): lr = adjust_learning_rate(optimizer, epoch, base_lr) print("Learning rate for epoch{}: {}".format(epoch, lr)) # train for one epoch try: train_epoch(model, train_loader, optimizer, criterion, epoch, loss_list, batch_time, data_time, losses, l2_errors) except KeyboardInterrupt: pass save_name = 'checkpoint_ep{}_bn_{}_{}.pth.tar'.format( epoch, bn_momentum, criterion_name) save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, False, save_name) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) save_name = 'checkpoint_ep{}_bn_{}_{}.pth.tar'.format( epoch, bn_momentum, criterion_name) if is_best: save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, True, save_name) np.save("./losses_lr{}_ep{}.npy".format(lr, epochs), np.array(loss_list)) # export scalar data to JSON for external processing # writer.export_scalars_to_json("./all_scalars.json") writer.add_scalar('bn_momentum', bn_momentum, 0) writer.close()
def main(): global args, best_prec1, weight_decay, momentum model = ITrackerModel() model = torch.nn.DataParallel(model) model.cuda() imSize = (224, 224) cudnn.benchmark = True epoch = 0 if doLoad: saved = load_checkpoint() if saved: print( 'Loading checkpoint for epoch %05d with loss %.5f (which is L2 = mean of squares)...' % (saved['epoch'], saved['best_prec1'])) state = saved['state_dict'] try: model.module.load_state_dict(state) except: model.load_state_dict(state) epoch = saved['epoch'] best_prec1 = saved['best_prec1'] else: print('Warning: Could not read checkpoint!') dataTrain = ITrackerData(split='train', imSize=imSize) dataVal = ITrackerData(split='val', imSize=imSize) dataTest = ITrackerData(split='test', imSize=imSize) train_loader = torch.utils.data.DataLoader(dataTrain, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(dataVal, batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataTest, batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) criterion = nn.MSELoss().cuda() # mean square error optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) # Quick test if doTest: validate(test_loader, model, criterion, epoch) return for epoch in range(0, epoch): adjust_learning_rate(optimizer, epoch) for epoch in range(epoch, epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1, weight_decay, momentum model = ITrackerModel() print("Total number of parameters: ", sum(p.numel() for p in model.parameters())) #print("Number of trainable parameters: ", sum(p.numel() for p in model.parameters() if p.requires_grad)) model = nn.DataParallel(model) model.cuda() imSize = (224, 224) cudnn.benchmark = True epoch = 0 if doLoad: saved = load_checkpoint() if saved: print( 'Loading checkpoint for epoch %05d with loss %.5f (which is the mean squared error not the actual linear error)...' % (saved['epoch'], saved['best_prec1'])) state = saved['state_dict'] try: model.module.load_state_dict(state) except: model.load_state_dict(state) epoch = saved['epoch'] best_prec1 = saved['best_prec1'] else: print('Warning: Could not read checkpoint!') dataTrain = ITrackerData(dataPath=args.data_path, split='train', imSize=imSize) dataVal = ITrackerData(dataPath=args.data_path, split='test', imSize=imSize) # data = dataVal.__getitem__(0) # for idx in range(len(data)): # np.squeeze(data[idx]) # model_stats = get_model_stats(model, data) train_loader = torch.utils.data.DataLoader(dataTrain, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(dataVal, batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) criterion = nn.MSELoss().cuda() optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) # Quick test if doTest: val_mean = validate(val_loader, model, criterion, epoch) print(val_mean) return for epoch in range(0, epoch): adjust_learning_rate(optimizer, epoch) for epoch in range(epoch, epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)