def main(args): tf = '_tf' if args.tf else '' run_dir = os.path.join( 'results/', args.config + tf + '_%.4f' % args.lambda_ient + '_%.4f' % args.lambda_tent) word_dict = json.load(open(args.data + '/word_dict.json', 'r')) vocabulary_size = len(word_dict) encoder = Encoder(args.network, args.config) decoder = Decoder(vocabulary_size, encoder.dim, args.tf) optimizer = optim.Adam(decoder.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, args.step_size) encoder.cuda() decoder.cuda() cross_entropy_loss = nn.CrossEntropyLoss().cuda() saver = Saver(Trainer(encoder, decoder, optimizer, scheduler), run_dir) writer = SummaryWriter(saver.log_dir) train_loader = torch.utils.data.DataLoader(ImageCaptionDataset( data_transforms, args.data), batch_size=args.batch_size, shuffle=True, num_workers=1) val_loader = torch.utils.data.DataLoader(ImageCaptionDataset( data_transforms, args.data, split_type='val'), batch_size=args.batch_size, shuffle=True, num_workers=1) test_loader = torch.utils.data.DataLoader(ImageCaptionDataset( data_transforms, args.data, split_type='test'), batch_size=args.batch_size, shuffle=False, num_workers=1) print('Starting training with {}'.format(args)) for epoch in range(saver.start_epoch, args.epochs + 1): train(epoch, encoder, decoder, optimizer, cross_entropy_loss, train_loader, word_dict, args.alpha_c, args.log_interval, writer, saver, val_loader, args) saver.save_model(epoch) validate(epoch, encoder, decoder, cross_entropy_loss, val_loader, word_dict, args.alpha_c, args.log_interval, writer, saver) test(epoch, encoder, decoder, cross_entropy_loss, test_loader, word_dict, args.alpha_c, args.log_interval, writer, saver) old_lr = optimizer.param_groups[0]['lr'] scheduler.step() lr = optimizer.param_groups[0]['lr'] print('learning rate %.7f -> %.7f' % (old_lr, lr)) writer.close()
def train(args, model, train_set): # to cuda model.cuda() model.train() # dataloader train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, drop_last=True, shuffle=True, num_workers=int( args.num_threads)) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=args.scheduler_step_size, gamma=args.scheduler_gamma) # saver saver = Saver(args) # loss function criterion = torch.nn.L1Loss() # time time_start_train = time.time() # misc num_batch = train_set.__len__() // args.batch_size counter = 0 backup_codes(args) # compute paras params = network_paras(model) log = "num of parameters: {:,}".format(params) saver.save_log(log) print(log) # init weights def weights_init(m): if isinstance(m, torch.nn.Conv2d): init.kaiming_normal_(m.weight.data) if not args.is_finetuning: model.apply(weights_init) # start training print('{:=^40}'.format(' training start ')) for epoch in range(args.epochs): scheduler.step(epoch) running_loss = 0.0 for bidx, (_, im_lr, im_hr) in enumerate(train_loader): im_lr = Variable(im_lr.cuda(), volatile=False) im_hr = Variable(im_hr.cuda()) # zero the parameter gradients model.zero_grad() # forward output = model(im_lr) # loss loss = criterion(output, im_hr) # backward & update loss.backward() optimizer.step() # accumulate running loss running_loss += loss.cpu().item() # print for every N batch if counter % args.step_print_loss == 0: # time acc_time = time.time() - time_start_train # log log = 'epoch: (%d/%d) [%5d/%5d], loss: %.6f | time: %s' % \ (epoch, args.epochs, bidx, num_batch, running_loss, str(datetime.timedelta(seconds=acc_time))) print(log) saver.save_log(log) running_loss = 0.0 print_lr(optimizer) if counter and counter % args.step_save == 0: # save saver.save_model(model) # counter increment counter += 1 print('{:=^40}'.format(' Finish ')) runtime = time.time() - time_start_train print('training time:', str(datetime.timedelta(seconds=runtime)) + '\n\n')