def main(): allowed_models = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] parser = argparse.ArgumentParser(description='Train NN') parser.add_argument('data_dir', help='directory containing sub-folders with data') parser.add_argument('--save_dir', help='directory for saving checkpoint', default='checkpoints') parser.add_argument('--arch', help='pre-trained model architecture', default='resnet18', choices=allowed_models) parser.add_argument('--learning_rate', help='learning rate during learning', type=float, default=0.01) parser.add_argument('--dropout', help='dropout during learning', type=float, default=0.05) parser.add_argument('--hidden_units', help='List of number of nodes in hidden layers', nargs='+', type=int, default=[256, 128]) parser.add_argument('--epochs', help='Number of epochs for training', default=3, type=int) parser.add_argument('--gpu', help='Enable GPU', action='store_true') args = parser.parse_args() # Describe directories relative to working directory data_dir = args.data_dir train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' save_dir = args.save_dir model_arch = args.arch model_hidden_units = args.hidden_units learning_rate = args.learning_rate drop = args.dropout print('Data directory: ' + data_dir) print('hidden units: ' + str(args.hidden_units)) print('Save directory: ' + save_dir) print('Architecture: ' + args.arch) fu.create_directory(save_dir) model = models.__getattribute__(model_arch)(pretrained=True) for param in model.parameters(): param.requires_grad = False model.fc = fu.Network(model.fc.in_features, 102, model_hidden_units, drop) criterion = nn.NLLLoss() optimizer = optim.Adam(model.fc.parameters(), lr=learning_rate) device = torch.device('cuda' if torch.cuda.is_available() and args.gpu==True else 'cpu') print('device: ', device) epochs = args.epochs print_every = 50 running_loss = 0 steps = 0 train_loader, test_loader, valid_loader, train_data, test_data, valid_data = load_transform.load_transform(data_dir, train_dir, valid_dir, test_dir) fu.train(device, model, epochs, criterion, optimizer, print_every, train_loader, test_loader, valid_loader) fu.save_checkpoint(model, model_arch, epochs, criterion, optimizer, train_data, save_dir) return model, test_loader, criterion
('fc2', nn.Linear(4096, 1000)), ('relu2', nn.ReLU(inplace=True)), ('dropout2', nn.Dropout()), ('fc3', nn.Linear(1000, 102)), ('output', nn.LogSoftmax(dim=1)) ])) model.classifier = classifier # Set parameters for training model criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=learn_rate) # Train a model with a pre-trained network and compute the accuracy model.train() f.train(epochs, trainloader, valloader, model, criterion, optimizer) # Validate the model model.eval() print("\nValidating the trained model on a different dataset. The accuracy result is below:") f.validation(testloader, model, criterion) # Save the model checkpoint = {'filepath': data_dir, 'model': models.vgg16(pretrained=True), 'classifier': classifier, 'optimizer': optimizer.state_dict(), 'state_dict': model.state_dict(), 'class_to_idx': train_data.class_to_idx, 'criterion': criterion, 'epochs': epochs}
def main(): args = parse_args() cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED gpus = [int(i) for i in config.GPUS.split(',')] logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } # initialize generator and discriminator G_AB = eval('models.cyclegan.get_generator')(config.DATA.IMAGE_SHAPE, config.NETWORK.NUM_RES_BLOCKS) G_BA = eval('models.cyclegan.get_generator')(config.DATA.IMAGE_SHAPE, config.NETWORK.NUM_RES_BLOCKS) D_A = eval('models.cyclegan.get_discriminator')(config.DATA.IMAGE_SHAPE) D_B = eval('models.cyclegan.get_discriminator')(config.DATA.IMAGE_SHAPE) #logger.info(pprint.pformat(G_AB)) #logger.info(pprint.pformat(D_A)) # multi-gpus model_dict = {} model_dict['G_AB'] = torch.nn.DataParallel(G_AB, device_ids=gpus).cuda() model_dict['G_BA'] = torch.nn.DataParallel(G_BA, device_ids=gpus).cuda() model_dict['D_A'] = torch.nn.DataParallel(D_A, device_ids=gpus).cuda() model_dict['D_B'] = torch.nn.DataParallel(D_B, device_ids=gpus).cuda() # loss functions criterion_dict = {} criterion_dict['GAN'] = torch.nn.MSELoss().cuda() criterion_dict['cycle'] = torch.nn.L1Loss().cuda() criterion_dict['identity'] = torch.nn.L1Loss().cuda() # optimizers optimizer_dict = {} optimizer_dict['G'] = get_optimizer( config, itertools.chain(G_AB.parameters(), G_BA.parameters())) optimizer_dict['D_A'] = get_optimizer(config, D_A.parameters()) optimizer_dict['D_B'] = get_optimizer(config, D_B.parameters()) start_epoch = config.TRAIN.START_EPOCH if config.TRAIN.RESUME: start_epoch, model_dict, optimizer_dict = load_checkpoint( model_dict, optimizer_dict, final_output_dir) # learning rate schedulers lr_scheduler_dict = {} lr_scheduler_dict['G'] = get_lr_scheduler(config, optimizer_dict['G']) lr_scheduler_dict['D_A'] = get_lr_scheduler(config, optimizer_dict['D_A']) lr_scheduler_dict['D_B'] = get_lr_scheduler(config, optimizer_dict['D_B']) for steps in range(start_epoch): for lr_scheduler in lr_scheduler_dict.values(): lr_scheduler.step() #Buffers of previously generated samples fake_A_buffer = ReplayBuffer() fake_B_buffer = ReplayBuffer() # Image transformations transforms_ = [ #transforms.Resize(int(config.img_height * 1.12), Image.BICUBIC), #transforms.RandomCrop((config.img_height, config.img_width)), #transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ] # Dataset logger.info('=> loading train and testing dataset...') train_dataset = ImageDataset(config.DATA.TRAIN_DATASET_B, config.DATA.TRAIN_DATASET, transforms_=transforms_) test_dataset = ImageDataset(config.DATA.TEST_DATASET_B, config.DATA.TEST_DATASET, transforms_=transforms_, mode='test') # Training data loader train_dataloader = DataLoader(train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.NUM_WORKERS) # Test data loader test_dataloader = DataLoader(test_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.NUM_WORKERS) for epoch in range(start_epoch, config.TRAIN.END_EPOCH): train(config, epoch, model_dict, fake_A_buffer, fake_B_buffer, train_dataloader, criterion_dict, optimizer_dict, lr_scheduler_dict, writer_dict) test(config, model_dict, test_dataloader, criterion_dict, final_output_dir) for lr_scheduler in lr_scheduler_dict.values(): lr_scheduler.step() if config.TRAIN.CHECKPOINT_INTERVAL != -1 and epoch % config.TRAIN.CHECKPOINT_INTERVAL == 0: logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': 'cyclegan', 'state_dict_G_AB': model_dict['G_AB'].module.state_dict(), 'state_dict_G_BA': model_dict['G_BA'].module.state_dict(), 'state_dict_D_A': model_dict['D_A'].module.state_dict(), 'state_dict_D_B': model_dict['D_B'].module.state_dict(), 'optimizer_G': optimizer_dict['G'].state_dict(), 'optimizer_D_A': optimizer_dict['D_A'].state_dict(), 'optimizer_D_B': optimizer_dict['D_B'].state_dict(), }, final_output_dir) writer_dict['writer'].close()
# record training process epoch_train_losses = [] epoch_train_scores = [] epoch_test_losses = [] epoch_test_scores = [] lamdaValue = np.ones((opt.epochs)) savePath, date_method, save_model_path = save_path(opt) writer = SummaryWriter(os.path.join(savePath, date_method, 'logfile')) # start training for epoch in range(1, opt.epochs + 1): # train, test model train_losses, train_Target_scores, = train( [targetCNN, sourceImagenet, sourcePlaces], device, train_loader, optimizer, epoch, lamdaValue[epoch - 1], opt) test_total_loss, test_total_score = validation( [targetCNN, sourceImagenet, sourcePlaces], device, optimizer, valid_loader, lamdaValue[epoch - 1], opt) scheduler.step() # save results epoch_train_losses.append(np.mean(train_losses)) epoch_train_scores.append(np.mean(train_Target_scores)) epoch_test_losses.append(np.mean(test_total_loss)) epoch_test_scores.append(np.mean(test_total_score)) # plot average of each epoch loss value writer.add_scalar('Loss/train', np.mean(train_losses), epoch)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gammaValue) # record training process savePath, date_method, save_model_path = save_path(opt) train_logger = Logger(os.path.join(savePath, date_method, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) val_logger = Logger(os.path.join(savePath, date_method, 'val.log'), ['epoch', 'loss', 'acc', 'best_acc', 'lr']) writer = SummaryWriter(os.path.join(savePath, date_method,'logfile')) # start training best_acc = 0 for epoch in range(1, opt.epochs + 1): # train, test model train_losses, train_scores, = train([StudentModel, smartModel, perturbation_model], device, train_loader, optimizer, epoch, opt) test_losses, test_scores = validation([StudentModel, smartModel, perturbation_model], device, optimizer, valid_loader, opt) scheduler.step() # plot average of each epoch loss value train_logger.log({ 'epoch': epoch, 'loss': train_losses.avg, 'acc': train_scores.avg, 'lr': optimizer.param_groups[0]['lr'] }) if best_acc < test_scores.avg: best_acc = test_scores.avg torch.save({'state_dict': studentModel.state_dict()}, os.path.join(save_model_path, 'student_best.pth')) val_logger.log({
import test import function import embedding if __name__ == '__main__': order = input('choose embedding/train/test\n') if order == 'embedding': embedding.build_embed() elif order == 'train': function.train() elif order == 'test': function.test()