def train_and_validate(config):

    # data loaders
    trainloader, testloader = get_dataloaders(config)

    # model
    # if config.bn_num == 1:
    #     target_net = get_model(config, num_class(config.dataset))
    if config.bn_num == 2:
        target_net = get_model(config,
                               num_class(config.dataset),
                               bn_types=['base', 'deform'])
    else:
        raise Exception('invalid bn_num: {}'.format(config.bn_num))

    # deform_vae
    if config.deform_vae == 'deform_conv_cifar_v1':
        from models.deform_vae_cifar import VAE
        aug_net = VAE(config.z_dim_deform, config.fea_dim_deform)
        aug_net = nn.DataParallel(aug_net).cuda()
    else:
        raise Exception('invalid deform_vae: {}'.format(config.deform_vae))

    model = Augment(target_net=target_net, aug_net=aug_net, config=config)

    start_epoch = 0
    best_test_acc = 0.0
    test_acc = 0.0
    if config.resume:
        best_test_acc, test_acc, start_epoch = \
            utils.load_checkpoint(config, model.target_net, model.target_net_optim)

    print('trainloader length: {}'.format(len(trainloader)))
    print('testloader length: {}'.format(len(testloader)))

    exp_dir = utils.get_log_dir_path(config.exp_dir, config.exp_id)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    print('exp_dir: {}'.format(exp_dir))
    log_file = os.path.join(exp_dir, 'log.txt')
    names = ['epoch', 'lr', 'Train Acc', 'Test Acc', 'Best Test Acc']
    with open(log_file, 'a') as f:
        f.write('batch size: {}\n'.format(config.batch_size))
        f.write('lr: {}\n'.format(config.lr))
        f.write('momentum: {}\n'.format(config.momentum))
        f.write('weight_decay: {}\n'.format(config.weight_decay))
        for per_name in names:
            f.write(per_name + '\t')
        f.write('\n')
    # print('=> Training the base model')
    # print('start_epoch {}'.format(start_epoch))
    # print(type(start_epoch))
    # exit()
    print('target net grad clip: {}'.format(config.grad_clip))
    for epoch in range(start_epoch, config.epochs):
        # lr = adjust_learning_rate(optimizer, epoch, model.module, config)
        lr = model.target_net_optim.param_groups[0]['lr']
        print('lr: {}'.format(lr))
        # inner_lr = get_lr_cosine_decay(config, epoch)
        # print('inner_lr: {}'.format(inner_lr))
        # train for one epoch
        train_acc = train_epoch_two_bns(trainloader, model, epoch, config)
        # evaluate on test set
        # print('testing epoch ...')
        test_acc = validate_epoch(testloader, model, config)
        # remember best acc, evaluate on test set and save checkpoint
        is_best = test_acc > best_test_acc
        if is_best:
            best_test_acc = test_acc

        utils.save_checkpoint(
            model, {
                'epoch': epoch + 1,
                'state_dict': model.target_net.state_dict(),
                'test_acc': test_acc,
                'optimizer': model.target_net_optim.state_dict(),
            }, is_best, exp_dir)

        values = [train_acc, test_acc, best_test_acc]
        with open(log_file, 'a') as f:
            f.write('{:d}\t'.format(epoch))
            f.write('{:g}\t'.format(lr))
            for per_value in values:
                f.write('{:2.2f}\t'.format(per_value))
            f.write('\n')
        print('exp_dir: {}'.format(exp_dir))
Beispiel #2
0
def train_and_validate(config):

    # data loaders
    trainloader, testloader = get_dataloaders(config)

    # model
    model = get_model(config, num_class(config.dataset))

    # loss function
    criterion = nn.CrossEntropyLoss().cuda()

    # optimizer
    # if config.decay_type is None:
    #     params = model.parameters()
    # elif config.decay_type == 'no_bn':
    #     params = utils.add_weight_decay(model, config.weight_decay)
    # else:
    #     raise Exception('unknown decay type: {}'.format(config.decay_type))
    optimizer = optim.SGD(model.parameters(),
                          config.lr,
                          momentum=config.momentum,
                          weight_decay=config.weight_decay,
                          nesterov=True)
    # lr scheduler
    if config.lr_scheduler == 'cosine':
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=float(config.epochs), eta_min=0.)
    else:
        raise ValueError('invalid lr_schduler: {}'.format(config.lr_scheduler))

    # if config.warmup_epoch > 0:
    #     print('using lr warmup scheduler...')
    #     lr_scheduler = GradualWarmupScheduler(
    #         optimizer,
    #         multiplier=config.warmup_multiplier,
    #         total_epoch=config.warmup_epoch,
    #         after_scheduler=lr_scheduler
    #     )

    start_epoch = 0
    best_test_acc = 0.0
    test_acc = 0.0
    if config.resume:
        best_test_acc, test_acc, start_epoch = \
            utils.load_checkpoint(config, model, optimizer)

    print('trainloader length: {}'.format(len(trainloader)))
    print('testloader length: {}'.format(len(testloader)))

    exp_dir = utils.get_log_dir_path(config.exp_dir, config.exp_id)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    print('exp_dir: {}'.format(exp_dir))
    log_file = os.path.join(exp_dir, 'log.txt')
    names = ['epoch', 'lr', 'Train Acc', 'Test Acc', 'Best Test Acc']
    with open(log_file, 'a') as f:
        f.write('batch size: {}\n'.format(config.batch_size))
        f.write('lr: {}\n'.format(config.lr))
        f.write('momentum: {}\n'.format(config.momentum))
        f.write('weight_decay: {}\n'.format(config.weight_decay))
        for per_name in names:
            f.write(per_name + '\t')
        f.write('\n')
    # print('=> Training the base model')
    # print('start_epoch {}'.format(start_epoch))
    # print(type(start_epoch))
    # exit()
    for epoch in range(start_epoch, config.epochs):
        # lr = adjust_learning_rate(optimizer, epoch, model.module, config)
        lr = optimizer.param_groups[0]['lr']
        print('lr: {}'.format(lr))
        # inner_lr = get_lr_cosine_decay(config, epoch)
        # print('inner_lr: {}'.format(inner_lr))
        # train for one epoch
        # print('training epoch ...')
        train_acc = train_epoch(trainloader, model, criterion, optimizer,
                                lr_scheduler, epoch, config)
        # evaluate on test set
        # print('testing epoch ...')
        test_acc = validate_epoch(testloader, model, criterion, config)
        # remember best acc, evaluate on test set and save checkpoint
        is_best = test_acc > best_test_acc
        if is_best:
            best_test_acc = test_acc

        utils.save_checkpoint(
            model, {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'test_acc': test_acc,
                'optimizer': optimizer.state_dict(),
            }, is_best, exp_dir)

        values = [train_acc, test_acc, best_test_acc]
        with open(log_file, 'a') as f:
            f.write('{:d}\t'.format(epoch))
            f.write('{:g}\t'.format(lr))
            for per_value in values:
                f.write('{:2.2f}\t'.format(per_value))
            f.write('\n')
        print('exp_dir: {}'.format(exp_dir))