def val(epoch): since = time.time() ### Test ### val_loss, val_err = train_utils.test(model, val_loader, criterion, epoch) print('Val - Loss: {:.4f} | Acc: {:.4f}'.format(val_loss, 1 - val_err)) time_elapsed = time.time() - since print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60)) global best_loss if val_loss < best_loss: ### Checkpoint ### train_utils.save_weights(model, epoch, val_loss, val_err) best_loss = val_loss ### Adjust Lr ### train_utils.adjust_learning_rate(LR, LR_DECAY, optimizer, epoch, DECAY_EVERY_N_EPOCHS) return val_loss, 1 - val_err
def main(): args = parse_arguments() random.seed(args.seed) torch.manual_seed(args.seed) if args.use_cuda: torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True model_path = get_model_path(args.dataset, args.arch, args.seed) # Init logger log_file_name = os.path.join(model_path, 'log.txt') print("Log file: {}".format(log_file_name)) log = open(log_file_name, 'w') print_log('model path : {}'.format(model_path), log) state = {k: v for k, v in args._get_kwargs()} for key, value in state.items(): print_log("{} : {}".format(key, value), log) print_log("Random Seed: {}".format(args.seed), log) print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("Torch version : {}".format(torch.__version__), log) print_log("Cudnn version : {}".format(torch.backends.cudnn.version()), log) # Data specifications for the webistes dataset mean = [0., 0., 0.] std = [1., 1., 1.] input_size = 224 num_classes = 4 # Dataset traindir = os.path.join(WEBSITES_DATASET_PATH, 'train') valdir = os.path.join(WEBSITES_DATASET_PATH, 'val') train_transform = transforms.Compose([ transforms.Resize(input_size), transforms.ToTensor(), transforms.Normalize(mean, std) ]) test_transform = transforms.Compose([ transforms.Resize(input_size), transforms.ToTensor(), transforms.Normalize(mean, std) ]) data_train = dset.ImageFolder(root=traindir, transform=train_transform) data_test = dset.ImageFolder(root=valdir, transform=test_transform) # Dataloader data_train_loader = torch.utils.data.DataLoader(data_train, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) data_test_loader = torch.utils.data.DataLoader(data_test, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # Network if args.arch == "vgg16": net = models.vgg16(pretrained=True) elif args.arch == "vgg19": net = models.vgg19(pretrained=True) elif args.arch == "resnet18": net = models.resnet18(pretrained=True) elif args.arch == "resnet50": net = models.resnet50(pretrained=True) elif args.arch == "resnet101": net = models.resnet101(pretrained=True) elif args.arch == "resnet152": net = models.resnet152(pretrained=True) else: raise ValueError("Network {} not supported".format(args.arch)) if num_classes != 1000: net = manipulate_net_architecture(model_arch=args.arch, net=net, num_classes=num_classes) # Loss function if args.loss_function == "ce": criterion = torch.nn.CrossEntropyLoss() else: raise ValueError # Cuda if args.use_cuda: net.cuda() criterion.cuda() # Optimizer momentum = 0.9 decay = 5e-4 optimizer = torch.optim.SGD(net.parameters(), lr=args.learning_rate, momentum=momentum, weight_decay=decay, nesterov=True) recorder = RecorderMeter(args.epochs) start_time = time.time() epoch_time = AverageMeter() # Main loop for epoch in range(args.epochs): current_learning_rate = adjust_learning_rate(args.learning_rate, momentum, optimizer, epoch, args.gammas, args.schedule) need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) # train for one epoch train_acc, train_los = train_model(data_loader=data_train_loader, model=net, criterion=criterion, optimizer=optimizer, epoch=epoch, log=log, print_freq=200, use_cuda=True) # evaluate on test set print_log("Validation on test dataset:", log) val_acc, val_loss = validate(data_test_loader, net, criterion, log=log, use_cuda=args.use_cuda) recorder.update(epoch, train_los, train_acc, val_loss, val_acc) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), 'args': copy.deepcopy(args), }, model_path, 'checkpoint.pth.tar') # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve(os.path.join(model_path, 'curve.png')) log.close()
# time_elapsed // 60, time_elapsed % 60)) ### Checkpoint ### if epoch % args.save_freq is 0 or epoch == args.epochs: logging.info('Saving model at Epoch: {}'.format(epoch)) train_utils.save_checkpoint(dir=args.dir, epoch=epoch, state_dict=model.state_dict(), optimizer=optimizer.state_dict(), alphas=alphas, betas=betas) if args.optimizer == 'RMSProp': ### Adjust Lr ### if epoch < args.ft_start: scheduler.step(epoch=epoch) else: #scheduler.step(epoch=-1) #reset to args.lr_init for fine-tuning train_utils.adjust_learning_rate(optimizer, args.ft_lr) elif args.optimizer == 'SGD': lr = train_utils.schedule(epoch, args.lr_init, args.epochs) train_utils.adjust_learning_rate(optimizer, lr) ### Test set ### test_loss, test_err, test_iou = train_utils.test(model, loaders['test'], criterion, alphas, betas, biOptimizer, ngpus, dbsn) logging.info('Test - Loss: {:.4f} | Acc: {:.4f} | IOU: {:.4f}'.format( test_loss, 1 - test_err, test_iou))
writer.add_scalar("val/loss", val_loss, epoch) writer.add_scalar("val/error", val_err, epoch) time_elapsed = time.time() - since print("Total Time {:.0f}m {:.0f}s\n".format(time_elapsed // 60, time_elapsed % 60)) ### Checkpoint ### if epoch % args.save_freq == 0: print("Saving model at Epoch: ", epoch) save_checkpoint( dir=args.dir, epoch=epoch, state_dict=model.state_dict(), optimizer=optimizer.state_dict(), ) lr = schedule( epoch, args.lr_init, args.epochs ) adjust_learning_rate(optimizer, lr) writer.add_scalar("hypers/lr", lr, epoch) ### Test set ### test_loss, test_err, test_iou = train_utils.test(model, loaders["test"], criterion) print( "SGD Test - Loss: {:.4f} | Acc: {:.4f} | IOU: {:.4f}".format( test_loss, 1 - test_err, test_iou ) )
val_dataset = Dataset(os.path.join(args.valdir, 'Image/'), os.path.join(args.valdir, 'LabeledImage/'), mytransforms) val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=1) model = FCDenseNet103(args.classes) #model=model.cuda() model = nn.DataParallel(model).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5) #optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=1e-4) criterion = nn.NLLLoss2d() train_utils.load_weights(model, 'weights/weights-0-0.178-0.000.pth') for epoch in range(args.epochs): print('\n _______________________________________________') trn_loss, trn_err = train_utils.train(model, train_loader, optimizer, criterion, epoch) print('Epoch {:d}\nTrain - Loss: {:.4f}'.format(epoch, trn_loss)) train_utils.save_weights(model, epoch, float(trn_loss), 0) ### Adjust Lr ### train_utils.adjust_learning_rate(args.lr, args.decay, optimizer, epoch, DECAY_EVERY_N_EPOCHS) ### Validate ### train_utils.view_sample_predictions(model, val_loader)
since = time.time() # Train trn_loss, trn_err = train(model, train_loader, optimizer, criterion, epoch) print("Epoch {:d}\nTrain - Loss: {:.4f}, Acc: {:.4f}".format( epoch, trn_loss, 1 - trn_err)) time_elapsed = time.time() - since print("Train Time {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60)) # Test val_loss, val_err = test(model, val_loader, criterion, epoch) print("Val - Loss: {:.4f} | Acc: {:.4f}".format(val_loss, 1 - val_err)) time_elapsed = time.time() - since print("Total Time {:.0f}m {:.0f}s\n".format(time_elapsed // 60, time_elapsed % 60)) if val_tmp < val_loss: # early stopping break else: val_tmp = val_loss # save results save_result(trn_loss, trn_err, val_loss, val_err, epoch) ### Adjust Lr ### train_utils.adjust_learning_rate(LR, LR_DECAY, optimizer, epoch, DECAY_EVERY_N_EPOCHS) ### Checkpoint ### train_utils.save_weights(model, epoch, val_loss, val_err, mode=mode)
def main(): args = parse_arguments() random.seed(args.pretrained_seed) torch.manual_seed(args.pretrained_seed) if args.use_cuda: torch.cuda.manual_seed_all(args.pretrained_seed) cudnn.benchmark = True # get a path for saving the model to be trained model_path = get_model_path(dataset_name=args.pretrained_dataset, network_arch=args.pretrained_arch, random_seed=args.pretrained_seed) # Init logger log_file_name = os.path.join(model_path, 'log_seed_{}.txt'.format(args.pretrained_seed)) print("Log file: {}".format(log_file_name)) log = open(log_file_name, 'w') print_log('save path : {}'.format(model_path), log) state = {k: v for k, v in args._get_kwargs()} for key, value in state.items(): print_log("{} : {}".format(key, value), log) print_log("Random Seed: {}".format(args.pretrained_seed), log) print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("Torch version : {}".format(torch.__version__), log) print_log("Cudnn version : {}".format(torch.backends.cudnn.version()), log) # Get data specs num_classes, (mean, std), input_size, num_channels = get_data_specs(args.pretrained_dataset, args.pretrained_arch) pretrained_data_train, pretrained_data_test = get_data(args.pretrained_dataset, mean=mean, std=std, input_size=input_size, train_target_model=True) pretrained_data_train_loader = torch.utils.data.DataLoader(pretrained_data_train, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) pretrained_data_test_loader = torch.utils.data.DataLoader(pretrained_data_test, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print_log("=> Creating model '{}'".format(args.pretrained_arch), log) # Init model, criterion, and optimizer net = get_network(args.pretrained_arch, input_size=input_size, num_classes=num_classes, finetune=args.finetune) print_log("=> Network :\n {}".format(net), log) net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) non_trainale_params = get_num_non_trainable_parameters(net) trainale_params = get_num_trainable_parameters(net) total_params = get_num_parameters(net) print_log("Trainable parameters: {}".format(trainale_params), log) print_log("Non Trainable parameters: {}".format(non_trainale_params), log) print_log("Total # parameters: {}".format(total_params), log) # define loss function (criterion) and optimizer criterion_xent = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=True) if args.use_cuda: net.cuda() criterion_xent.cuda() recorder = RecorderMeter(args.epochs) # Main loop start_time = time.time() epoch_time = AverageMeter() for epoch in range(args.epochs): current_learning_rate = adjust_learning_rate(args.learning_rate, args.momentum, optimizer, epoch, args.gammas, args.schedule) need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) # train for one epoch train_acc, train_los = train_target_model(pretrained_data_train_loader, net, criterion_xent, optimizer, epoch, log, print_freq=args.print_freq, use_cuda=args.use_cuda) # evaluate on validation set print_log("Validation on pretrained test dataset:", log) val_acc = validate(pretrained_data_test_loader, net, criterion_xent, log, use_cuda=args.use_cuda) is_best = recorder.update(epoch, train_los, train_acc, 0., val_acc) save_checkpoint({ 'epoch' : epoch + 1, 'arch' : args.pretrained_arch, 'state_dict' : net.state_dict(), 'recorder' : recorder, 'optimizer' : optimizer.state_dict(), 'args' : copy.deepcopy(args), }, model_path, 'checkpoint.pth.tar') # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve(os.path.join(model_path, 'curve.png') ) log.close()