def train_and_validate(config): # data loaders trainloader, testloader = get_dataloaders(config) # model # if config.bn_num == 1: # target_net = get_model(config, num_class(config.dataset)) if config.bn_num == 2: target_net = get_model(config, num_class(config.dataset), bn_types=['base', 'deform']) else: raise Exception('invalid bn_num: {}'.format(config.bn_num)) # deform_vae if config.deform_vae == 'deform_conv_cifar_v1': from models.deform_vae_cifar import VAE aug_net = VAE(config.z_dim_deform, config.fea_dim_deform) aug_net = nn.DataParallel(aug_net).cuda() else: raise Exception('invalid deform_vae: {}'.format(config.deform_vae)) model = Augment(target_net=target_net, aug_net=aug_net, config=config) start_epoch = 0 best_test_acc = 0.0 test_acc = 0.0 if config.resume: best_test_acc, test_acc, start_epoch = \ utils.load_checkpoint(config, model.target_net, model.target_net_optim) print('trainloader length: {}'.format(len(trainloader))) print('testloader length: {}'.format(len(testloader))) exp_dir = utils.get_log_dir_path(config.exp_dir, config.exp_id) if not os.path.exists(exp_dir): os.makedirs(exp_dir) print('exp_dir: {}'.format(exp_dir)) log_file = os.path.join(exp_dir, 'log.txt') names = ['epoch', 'lr', 'Train Acc', 'Test Acc', 'Best Test Acc'] with open(log_file, 'a') as f: f.write('batch size: {}\n'.format(config.batch_size)) f.write('lr: {}\n'.format(config.lr)) f.write('momentum: {}\n'.format(config.momentum)) f.write('weight_decay: {}\n'.format(config.weight_decay)) for per_name in names: f.write(per_name + '\t') f.write('\n') # print('=> Training the base model') # print('start_epoch {}'.format(start_epoch)) # print(type(start_epoch)) # exit() print('target net grad clip: {}'.format(config.grad_clip)) for epoch in range(start_epoch, config.epochs): # lr = adjust_learning_rate(optimizer, epoch, model.module, config) lr = model.target_net_optim.param_groups[0]['lr'] print('lr: {}'.format(lr)) # inner_lr = get_lr_cosine_decay(config, epoch) # print('inner_lr: {}'.format(inner_lr)) # train for one epoch train_acc = train_epoch_two_bns(trainloader, model, epoch, config) # evaluate on test set # print('testing epoch ...') test_acc = validate_epoch(testloader, model, config) # remember best acc, evaluate on test set and save checkpoint is_best = test_acc > best_test_acc if is_best: best_test_acc = test_acc utils.save_checkpoint( model, { 'epoch': epoch + 1, 'state_dict': model.target_net.state_dict(), 'test_acc': test_acc, 'optimizer': model.target_net_optim.state_dict(), }, is_best, exp_dir) values = [train_acc, test_acc, best_test_acc] with open(log_file, 'a') as f: f.write('{:d}\t'.format(epoch)) f.write('{:g}\t'.format(lr)) for per_value in values: f.write('{:2.2f}\t'.format(per_value)) f.write('\n') print('exp_dir: {}'.format(exp_dir))
def train_and_validate(config): # data loaders trainloader, testloader = get_dataloaders(config) # model model = get_model(config, num_class(config.dataset)) # loss function criterion = nn.CrossEntropyLoss().cuda() # optimizer # if config.decay_type is None: # params = model.parameters() # elif config.decay_type == 'no_bn': # params = utils.add_weight_decay(model, config.weight_decay) # else: # raise Exception('unknown decay type: {}'.format(config.decay_type)) optimizer = optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay, nesterov=True) # lr scheduler if config.lr_scheduler == 'cosine': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=float(config.epochs), eta_min=0.) else: raise ValueError('invalid lr_schduler: {}'.format(config.lr_scheduler)) # if config.warmup_epoch > 0: # print('using lr warmup scheduler...') # lr_scheduler = GradualWarmupScheduler( # optimizer, # multiplier=config.warmup_multiplier, # total_epoch=config.warmup_epoch, # after_scheduler=lr_scheduler # ) start_epoch = 0 best_test_acc = 0.0 test_acc = 0.0 if config.resume: best_test_acc, test_acc, start_epoch = \ utils.load_checkpoint(config, model, optimizer) print('trainloader length: {}'.format(len(trainloader))) print('testloader length: {}'.format(len(testloader))) exp_dir = utils.get_log_dir_path(config.exp_dir, config.exp_id) if not os.path.exists(exp_dir): os.makedirs(exp_dir) print('exp_dir: {}'.format(exp_dir)) log_file = os.path.join(exp_dir, 'log.txt') names = ['epoch', 'lr', 'Train Acc', 'Test Acc', 'Best Test Acc'] with open(log_file, 'a') as f: f.write('batch size: {}\n'.format(config.batch_size)) f.write('lr: {}\n'.format(config.lr)) f.write('momentum: {}\n'.format(config.momentum)) f.write('weight_decay: {}\n'.format(config.weight_decay)) for per_name in names: f.write(per_name + '\t') f.write('\n') # print('=> Training the base model') # print('start_epoch {}'.format(start_epoch)) # print(type(start_epoch)) # exit() for epoch in range(start_epoch, config.epochs): # lr = adjust_learning_rate(optimizer, epoch, model.module, config) lr = optimizer.param_groups[0]['lr'] print('lr: {}'.format(lr)) # inner_lr = get_lr_cosine_decay(config, epoch) # print('inner_lr: {}'.format(inner_lr)) # train for one epoch # print('training epoch ...') train_acc = train_epoch(trainloader, model, criterion, optimizer, lr_scheduler, epoch, config) # evaluate on test set # print('testing epoch ...') test_acc = validate_epoch(testloader, model, criterion, config) # remember best acc, evaluate on test set and save checkpoint is_best = test_acc > best_test_acc if is_best: best_test_acc = test_acc utils.save_checkpoint( model, { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'test_acc': test_acc, 'optimizer': optimizer.state_dict(), }, is_best, exp_dir) values = [train_acc, test_acc, best_test_acc] with open(log_file, 'a') as f: f.write('{:d}\t'.format(epoch)) f.write('{:g}\t'.format(lr)) for per_value in values: f.write('{:2.2f}\t'.format(per_value)) f.write('\n') print('exp_dir: {}'.format(exp_dir))