def train(**kwargs): device = t.device('cuda') if t.cuda.is_available() else t.device('cpu') for k, v in kwargs.items(): setattr(opt, k, v) dataloader = get_dataloader(opt) model = CaptionModel(opt, dataloader.dataset.word2ix, dataloader.dataset.id2ix) if opt.model_path: model.load_state_dict(t.load(opt.model_path, map_location='cpu')) t.backends.cudnn.enabled = False model = model.to(device) optimizer = Adam(model.parameters(), opt.lr) criterion = t.nn.CrossEntropyLoss() for epoch in range(opt.max_epoch): for ii, (imgs, (captions, lengths), indexes) in tqdm.tqdm(enumerate(dataloader)): imgs = Variable(imgs).to(device) captions = Variable(captions).to(device) pred, _ = model(imgs, captions, lengths) target_captions = pack_padded_sequence(captions, lengths)[0] loss = criterion(pred, target_captions) optimizer.zero_grad() loss.backward() optimizer.step() print("Current Loss: ", loss.item()) if (epoch + 1) % opt.save_model == 0: t.save(model.state_dict(), "checkpoints/{}.pth".format(epoch))
def main(): global args args = parser.parse_args() if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) checkpoint_file = os.path.join(save_path, 'checkpoint_epoch_%s.pth.tar') logging.debug("run arguments: %s", args) logging.info("using pretrained cnn %s", args.cnn) cnn = resnet.__dict__[args.cnn](pretrained=True) vocab = build_vocab() model = CaptionModel(cnn, vocab, embedding_size=args.embedding_size, rnn_size=args.rnn_size, num_layers=args.num_layers, share_embedding_weights=args.share_weights) train_data = get_iterator(get_coco_data(vocab, train=True), batch_size=args.batch_size, max_length=args.max_length, shuffle=True, num_workers=args.workers) val_data = get_iterator(get_coco_data(vocab, train=False), batch_size=args.eval_batch_size, max_length=args.max_length, shuffle=False, num_workers=args.workers) if 'cuda' in args.type: cudnn.benchmark = True model.cuda() optimizer = select_optimizer( args.optimizer, params=model.parameters(), lr=args.lr) regime = lambda e: {'lr': args.lr * (args.lr_decay ** e), 'momentum': args.momentum, 'weight_decay': args.weight_decay} model.finetune_cnn(False) def forward(model, data, training=True, optimizer=None): use_cuda = 'cuda' in args.type loss = nn.CrossEntropyLoss() perplexity = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() if training: model.train() else: model.eval() end = time.time() for i, (imgs, (captions, lengths)) in enumerate(data): data_time.update(time.time() - end) if use_cuda: imgs = imgs.cuda() captions = captions.cuda(async=True) imgs = Variable(imgs, volatile=not training) captions = Variable(captions, volatile=not training) input_captions = captions[:-1] target_captions = pack_padded_sequence(captions, lengths)[0] pred, _ = model(imgs, input_captions, lengths) err = loss(pred, target_captions) perplexity.update(math.exp(err.data[0])) if training: optimizer.zero_grad() err.backward() clip_grad_norm(model.rnn.parameters(), args.grad_clip) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format( epoch, i, len(data), phase='TRAINING' if training else 'EVALUATING', batch_time=batch_time, data_time=data_time, perp=perplexity)) return perplexity.avg for epoch in range(args.start_epoch, args.epochs): if epoch >= args.finetune_epoch: model.finetune_cnn(True) optimizer = adjust_optimizer( optimizer, epoch, regime) # Train train_perp = forward( model, train_data, training=True, optimizer=optimizer) # Evaluate val_perp = forward(model, val_data, training=False) logging.info('\n Epoch: {0}\t' 'Training Perplexity {train_perp:.4f} \t' 'Validation Perplexity {val_perp:.4f} \n' .format(epoch + 1, train_perp=train_perp, val_perp=val_perp)) model.save_checkpoint(checkpoint_file % (epoch + 1))
logger.info('Building model...') model = CaptionModel(opt) xe_criterion = CrossEntropyCriterion() rl_criterion = RewardCriterion() if torch.cuda.is_available(): model.cuda() xe_criterion.cuda() rl_criterion.cuda() logger.info('Start training...') start = datetime.now() optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate) infos = train(model, xe_criterion, optimizer, train_loader, val_loader, opt, rl_criterion=rl_criterion) logger.info('Best val %s score: %f. Best iter: %d. Best epoch: %d', opt.eval_metric, infos['best_score'], infos['best_iter'], infos['best_epoch']) logger.info('Training time: %s', datetime.now() - start) if opt.result_file: logger.info('Start testing...')
shuffle=False, num_workers=workers) model = CaptionModel(cnn, vocab, embedding_size=embedding_size, rnn_size=rnn_size, num_layers=num_layers, share_embedding_weights=share_weights) if 'cuda' in type: cudnn.benchmark = True model.cuda() optimizer = select_optimizer(optimizer, params=model.parameters(), lr=learning_rate) regime = lambda e: { 'lr': learning_rate * (lr_decay**e), 'momentum': momentum, 'weight_decay': weight_decay } model.finetune_cnn(False) def forward(model, data, training=True, optimizer=None): use_cuda = 'cuda' in type loss = nn.CrossEntropyLoss() perplexity = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter()
transform=mytransform, train=True) flicker8k_val = FlickrDataLoader.Flicker8k(img_dir, cap_path, val_txt, transform=mytransform, train=True) with open('feat6k.npy', 'r') as f: feat_tr = np.load(f) with open('capt6k.pkl', 'r') as f: caption_trn = pickle.load(f) with open('feat.pkl', 'r') as f: feat_val = pickle.load(f) with open('capt1k.pkl', 'r') as f: caption_val = pickle.load(f) model = CaptionModel(bsz=1, feat_dim=(196, 512), n_voc=5834, n_embed=512, n_hidden=1024).cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) train(epoches=1) with open('model_t.pth', 'r') as f: model.load_state_dict(torch.load(f))
train_loader = DataLoader(train_dataset, batch_size, collate_fn=my_collate) caption_model = CaptionModel(model_info['vocab_size'], embedding_dim, hidden_size=hidden_size, embedding_matrix=embedding_matrix, embedding_train=True) init_weights(caption_model, embedding_pretrained=True) caption_model.to(device) # we will ignore the pad token in true target set criterion = nn.CrossEntropyLoss(ignore_index=0) optimizer = torch.optim.Adam(caption_model.parameters(), lr=0.01) clip = 1 start = time() print(f'Training...') for i in tqdm(range(EPOCHS * 6)): loss = train(caption_model, train_loader, optimizer, criterion, clip, model_info['vocab_size']) print(f'loss = {loss}') # reduce the learning rate for param_group in optimizer.param_groups: param_group['lr'] = 1e-4 for i in tqdm(range(EPOCHS * 6)):