Example #1
0
def run_train(opt, training_data_loader, validation_data_loader):
    if not os.path.exists(opt.checkpoint_dir):
        os.makedirs(opt.checkpoint_dir)

    log_file = os.path.join(opt.checkpoint_dir, 'vgg_log.csv')

    print('[Initialize networks for training]')

    net = VGG(opt)
    L2_criterion = nn.MSELoss()
    print(net)

    if opt.resume:
        opt.start_epoch, net = load_model(opt, opt.checkpoint_dir)
    else:
        with open(log_file, mode='w') as f:
            f.write('epoch, train_loss, train_acc, valid_loss, valid_acc\n')

    print('===> Setting GPU')
    print('CUDA Available', torch.cuda.is_available())

    if opt.use_cuda and torch.cuda.is_available():
        opt.use_cuda = True
        opt.device = 'cuda'
    else:
        opt.use_cuda = False
        opt.device = 'cpu'

    if torch.cuda.device_count() > 1 and opt.multi_gpu:
        print("Use" + str(torch.cuda.device_count()) + 'GPUs')
        net = nn.DataParallel(net)

    if opt.use_cuda:
        net = net.to(opt.device)
        L2_criterion = L2_criterion.to(opt.device)

    print("===> Setting Optimizer")
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=opt.lr,
                                 betas=(opt.b1, opt.b2))

    for epoch in range(opt.start_epoch, opt.n_epochs):
        opt.epoch_num = epoch
        train_loss, train_acc = train(opt,
                                      net,
                                      optimizer,
                                      training_data_loader,
                                      loss_criterion=L2_criterion)
        valid_loss, valid_acc = evaluate(opt,
                                         net,
                                         validation_data_loader,
                                         loss_criterion=L2_criterion)

        with open(log_file, mode='a') as f:
            f.write("%d, %08f,%08f,%08f,%08f\n" %
                    (epoch, train_loss, train_acc, valid_loss, valid_acc))
        save_checkpoint(opt, net, epoch, valid_loss)
Example #2
0
    params.optimizer = torch.optim.SGD(trainable_vars,
                                       lr=init_lr,
                                       momentum=momentum,
                                       weight_decay=weight_decay,
                                       nesterov=nesterov)

    # Train
    params.lr_scheduler = ReduceLROnPlateau(params.optimizer,
                                            'min',
                                            factor=lr_decay,
                                            patience=10,
                                            cooldown=10,
                                            verbose=True)
    trainer = trainer.RedTrainer(model, params, train_dataloader,
                                 val_dataloader)
    trainer.train()

elif train == "blue":
    # setting loss function
    params.criterion = nn.MSELoss(reduce=True, size_average=True)

    # load data
    print("Loading dataset...")
    dataset = DataReader()

    batch_size = batch_size if len(
        params.gpus) == 0 else batch_size * len(params.gpus)
    train_dataloader = DataLoader(dataset.get_training_set(),
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=num_workers)
Example #3
0
    train_begin = time.time()


    for epoch in range(begin_epoch, h_params.max_epochs):
        train_queue = queue.Queue(h_params.workers * 2)

        train_loader = MultiLoader(train_dataset_list, train_queue, h_params.batch_size, h_params.workers)
        train_loader.start()

        if epoch == 25:
            optimizer = optim.Adam(model.module.parameters(), lr = 0.00005 )
            h_params.teacher_forcing = 0.99

        train_loss, train_cer = train(model, train_batch_num,
                                      train_queue, criterion,
                                      optimizer, device,
                                      train_begin, h_params.workers,
                                      10, h_params.teacher_forcing)

        logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer))

        train_loader.join()

        valid_queue = queue.Queue(h_params.workers * 2)
        valid_loader = BaseDataLoader(valid_dataset, valid_queue, h_params.batch_size, 0)
        valid_loader.start()

        eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device)
        logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer))

        valid_loader.join()
Example #4
0
def main():
    # Setup.
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", default="./config/example.yaml")
    parser.add_argument("--gpu", default="0", type=str)
    # Path to checkpoint (empty string means the latest checkpoint)
    # or False (means training from scratch).
    parser.add_argument("--resume", default="", type=str)
    args = parser.parse_args()
    config, inner_dir, config_name = load_config(args.config)
    saved_dir = get_saved_dir(config, inner_dir, config_name, args.resume)
    storage_dir, ckpt_dir = get_storage_dir(config, inner_dir, config_name,
                                            args.resume)
    logger = get_logger(saved_dir, "adv_training.log", args.resume)

    # Prepare data.
    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])
    test_transform = transforms.Compose([transforms.ToTensor()])
    train_data = cifar.CIFAR10(root=config["dataset_dir"],
                               transform=train_transform)
    test_data = cifar.CIFAR10(root=config["dataset_dir"],
                              train=False,
                              transform=test_transform)
    train_loader = DataLoader(train_data,
                              batch_size=config["batch_size"],
                              shuffle=True,
                              num_workers=4)
    test_loader = DataLoader(test_data,
                             batch_size=config["batch_size"],
                             num_workers=4)

    # Resume training state.
    model = resnet_cifar.ResNet18()
    gpu = int(args.gpu)
    logger.info("Set GPU to {}".format(args.gpu))
    model = model.cuda(gpu)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(),
                                **config["optimizer"]["SGD"])
    scheduler = lr_scheduler.MultiStepLR(
        optimizer, **config["lr_scheduler"]["MultiStepLR"])
    resumed_epoch = resume_state(model, optimizer, args.resume, ckpt_dir,
                                 scheduler)

    # Set attack first and then add a normalized layer.
    pgd_config = {}
    for k, v in config["pgd_attack"].items():
        if k == "eps" or k == "alpha":
            pgd_config[k] = eval(v)
        else:
            pgd_config[k] = v
    attacker = PGD(model, **pgd_config)
    normalize_net = NormalizeByChannelMeanStd((0.4914, 0.4822, 0.4465),
                                              (0.2023, 0.1994, 0.2010))
    normalize_net.cuda(gpu)
    model = nn.Sequential(normalize_net, model)

    for epoch in range(config["num_epochs"] - resumed_epoch):
        logger.info("===Epoch: {}/{}===".format(epoch + resumed_epoch + 1,
                                                config["num_epochs"]))
        logger.info("Adversarial training...")
        adv_train_result = train(model,
                                 train_loader,
                                 criterion,
                                 optimizer,
                                 logger,
                                 attacker=attacker)
        if scheduler is not None:
            scheduler.step()
            logger.info("Adjust learning rate to {}".format(
                optimizer.param_groups[0]["lr"]))
        logger.info("Test model on clean data...")
        clean_test_result = test(model, test_loader, criterion, logger)
        logger.info("Test model on adversarial data...")
        adv_test_result = test(model,
                               test_loader,
                               criterion,
                               logger,
                               attacker=attacker)
        result = {
            "adv_train": adv_train_result,
            "clean_test": clean_test_result,
            "adv_test": adv_test_result,
        }

        # Save checkpoint
        saved_dict = {
            "epoch": epoch + resumed_epoch + 1,
            "result": result,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        if scheduler is not None:
            saved_dict["scheduler_state_dict"] = scheduler.state_dict()
        torch.save(
            saved_dict,
            os.path.join(ckpt_dir,
                         "epoch{}.pt".format(epoch + resumed_epoch + 1)),
        )
Example #5
0
results_log = {'train_acc': [], 'train_loss': [], 'train_Lcls': [], 'train_Ldsne': [],
               'val_acc_A': [], 'val_acc_B': [], 'val_acc_C': [], 'val_acc_D': [],
               'val_loss_A': [], 'val_loss_B': [], 'val_loss_C': [], 'val_loss_D': [],
#                'val_Lcls_A': [], 'val_Lcls_B': [], 'val_Lcls_C': [], 'val_Lcls_D': [],
#                'val_Ldsne_A': [], 'val_Ldsne_B': [], 'val_Ldsne_C': [], 'val_Ldsne_D': []
               }

# train/val loop
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch+1, num_epochs))
    print('-------------')

    for phase in ['train', 'val']:
        # train
        if phase == 'train':
            results_train = train(train_steps, extractor, classifier, loader_train, optimizer, train_criterion, device)
            results_log = log_dict(results_train, results_log, phase, None)
        # val
        else:
            results_A_val = val(val_steps, extractor, classifier, loader_a_val, val_criterion, device)
            results_log = log_dict(results_A_val, results_log, phase, 'A')
            
            results_B_val = val(val_steps, extractor, classifier, loader_b_val, val_criterion, device)
            results_log = log_dict(results_B_val, results_log, phase, 'B')           
            
            results_C_val = val(val_steps, extractor, classifier, loader_c_val, val_criterion, device)
            results_log = log_dict(results_C_val, results_log, phase, 'C')
            
            results_D_val = val(val_steps, extractor, classifier, loader_d_val, val_criterion, device)
            results_log = log_dict(results_D_val, results_log, phase, 'D')
            
    test_loader = DataLoader(test_set, shuffle=False, batch_size=len(test_set))

    # Loss
    if args.loss.lower() == 'ce':
        loss_cls = nn.CrossEntropyLoss(weight=per_cls_weights).cuda()
    elif args.loss.lower() == 'ldam':
        loss_cls = LDAMLoss(cls_num_list=cls_num_list,
                            max_m=0.5,
                            s=30,
                            weight=per_cls_weights).cuda()
    elif args.loss.lower() == 'focal':
        loss_cls = FocalLoss(weight=per_cls_weights, gamma=1).cuda()

    # TODO: Modifying the train and evaluation functions
    # Training
    model = train(epoch, model, optim_model, loss_cls, loss_reg, train_loader,
                  gpu, args)

    if args.LR_schedule == True:
        scheduler.step()

    # Save the stage0 model
    if epoch % args.print_epoch == 0:
        result = test(epoch, model, loss_cls, test_loader, gpu, args)

        if args.earlystop == True:
            early(result['loss'], model, result)
            if early.early_stop == True:
                break

        if args.print_test == True:
            print('Epoch : %d, Test Acc : %2.2f, Test Loss : %.2f' %
Example #7
0
    if epoch == 0:
        train_loss, train_cls_loss, train_dom_loss, train_cls_acc, train_dom_acc = 0, 0, 0, 0, 0

        val_loss, val_cls_loss, val_dom_loss, val_cls_acc, val_dom_acc = val(
            epoch, num_epochs, num_steps_val, net, loader_abc_test,
            cls_criterion, dom_criterion, device, hp_lambda, gamma)

        test_d_loss, test_d_acc = test(net, loader_d_test, cls_criterion,
                                       device)

    else:
        for phase in ['train', 'val', 'test']:
            if phase == 'train':
                train_loss, train_cls_loss, train_dom_loss, train_cls_acc, train_dom_acc = train(
                    epoch, num_epochs, num_steps, net, loader_abc_train,
                    cls_criterion, dom_criterion, device, optimizer2,
                    hp_lambda, gamma)
            elif phase == 'val':
                val_loss, val_cls_loss, val_dom_loss, val_cls_acc, val_dom_acc = val(
                    epoch, num_epochs, num_steps_val, net, loader_abc_test,
                    cls_criterion, dom_criterion, device, hp_lambda, gamma)
            else:
                test_d_loss, test_d_acc = test(net, loader_d_test,
                                               cls_criterion, device)
                print('test D loss: {:.3f}, acc: {:.3f}'.format(
                    test_d_loss, test_d_acc))
                if test_d_acc > best_acc:
                    torch.save(net.state_dict(),
                               os.path.join(output_dir, 'model_best_acc.pth'))
                    best_acc = test_d_acc