def coco_evaluate(model, batch_size): predictions = [] val_ann_file = os.path.join(caption_dir, 'captions_val2017.json') coco = COCO(val_ann_file) img_ids = coco.getImgIds() img_list = [] for i in img_ids: img_list.append(os.path.join(coco_images, '%012d.jpg' % (i))) #Validation transformations normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_tfms = transforms.Compose([ transforms.Resize(int(224 * 1.14)), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) val_dataset = ImageDataset(img_list, val_tfms) val_dl = DataLoader(val_dataset, num_workers=8, batch_size=batch_size, drop_last=False, pin_memory=True, shuffle=False) counter = 0 result_json = [] for b in tqdm(val_dl): imgs = b.cuda(0) preds, _, _ = r.image_caption(model, imgs, mode='predict', return_str_preds=True) for p in preds: result_json.append({'image_id': img_ids[counter], 'caption': p}) counter += 1 with open('results/caption_prediction.json', 'w') as outfile: json.dump(result_json, outfile) #Evaluate the predictions cocoRes = coco.loadRes('results/caption_prediction.json') cocoEval = COCOEvalCap(coco, cocoRes) # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results # SPICE will take a few minutes the first time, but speeds up due to caching cocoEval.evaluate() print('\n\nCOCO Evaluation results') print('-' * 50) for metric, score in cocoEval.eval.items(): print('%s: %.3f' % (metric, score))
def train(shared_model, task, batch_size, train_steps, gpu_id, start, restore, counter, barrier=None, save_interval=None, eval_interval=None, log=True): log_dir = 'logs/%s' % task if not os.path.exists(log_dir): os.makedirs(log_dir) if (log == True): summary_writer = SummaryWriter(log_dir) # Create local model torch.manual_seed(int(random.random() * 1000)) if gpu_id > 0: model = omninet.OmniNet(gpu_id=gpu_id) model = model.cuda(gpu_id) else: #For GPU 0, use the shared model always model = shared_model if task == 'caption': DL, val_dl = dl.coco_cap_batchgen(caption_dir=caption_dir, image_dir=coco_images, num_workers=8, batch_size=batch_size) optimizer = ScheduledOptim(Adam(filter(lambda x: x.requires_grad, shared_model.parameters()), betas=(0.9, 0.98), eps=1e-09), 512, 16000, restore, init_lr=0.02) elif task == 'vqa': DL, val_dl = dl.vqa_batchgen(vqa_dir, coco_images, num_workers=8, batch_size=batch_size) optimizer = ScheduledOptim(Adam(filter(lambda x: x.requires_grad, shared_model.parameters()), betas=(0.9, 0.98), eps=1e-09), 512, 16000, restore, max_lr=0.0001, init_lr=0.02) elif task == 'hmdb': DL, val_dl = dl.hmdb_batchgen(hmdb_data_dir, hmdb_process_dir, num_workers=8, batch_size=batch_size, test_batch_size=int(batch_size / 4), clip_len=16) optimizer = ScheduledOptim(Adam(filter(lambda x: x.requires_grad, shared_model.parameters()), betas=(0.9, 0.98), eps=1e-09), 512, 16000, restore, max_lr=0.0001, init_lr=0.02) elif task == 'penn': DL, val_dl, test_dl = dl.penn_dataloader( penn_data_dir, batch_size=batch_size, test_batch_size=int(batch_size / 2), num_workers=4, vocab_file='conf/penn_vocab.json') optimizer = ScheduledOptim(Adam(filter(lambda x: x.requires_grad, shared_model.parameters()), betas=(0.9, 0.98), eps=1e-09), 512, 16000, restore, init_lr=0.02) model = model.train() for i in range(start, train_steps): model.zero_grad() if barrier is not None: barrier.wait() if gpu_id > 0: with torch.cuda.device(gpu_id): model.load_state_dict(shared_model.state_dict()) # Calculate loss step = counter.increment() if task == 'caption': if (log and eval_interval is not None and i % eval_interval == 0): model = model.eval() val_loss = 0 val_acc = 0 print('-' * 100) print('Evaluation step') for b in tqdm(val_dl): imgs = b['img'] if gpu_id >= 0: imgs = imgs.cuda(device=gpu_id) captions = b['cap'] # In val mode we do not pass the targets for prediction. We use it only for loss calculation _, loss, acc = r.image_caption(model, imgs, targets=captions, mode='val', return_str_preds=True) val_loss += float(loss.detach().cpu().numpy()) val_acc += acc val_loss /= len(val_dl) val_acc = (val_acc / len(val_dl)) summary_writer.add_scalar('Val_loss', val_loss, step) print('Step %d, COCO validation loss: %f, Accuracy %f %%' % (step, val_loss, val_acc)) print('-' * 100) model = model.train() batch = next(DL) if gpu_id >= 0: imgs = batch['img'].cuda(device=gpu_id) else: imgs = batch['img'] captions = batch['cap'] _, loss, acc = r.image_caption(model, imgs, targets=captions) loss.backward() loss = loss.detach() if log: summary_writer.add_scalar('Loss', loss, step) print('Step %d, Caption Loss: %f, Accuracy: %f %%' % (step, loss, acc)) elif task == 'vqa': if (log and eval_interval is not None and i % eval_interval == 0): model = model.eval() val_loss = 0 val_acc = 0 print('-' * 100) print('Evaluation step') for b in tqdm(val_dl): imgs = b['img'] answers = b['ans'] if gpu_id >= 0: imgs = imgs.cuda(device=gpu_id) answers = answers.cuda(device=gpu_id) questions = b['ques'] # In val mode we do not pass the targets for prediction. We use it only for loss calculation pred, loss, acc = r.vqa(model, imgs, questions, targets=answers, mode='val', return_str_preds=True) val_loss += float(loss.detach().cpu().numpy()) val_acc += acc val_loss /= len(val_dl) val_acc = (val_acc / len(val_dl)) summary_writer.add_scalar('Val_loss', val_loss, step) print('Step %d, VQA validation loss: %f, Accuracy %f %%' % (step, val_loss, val_acc)) print('-' * 100) model = model.train() continue batch = next(DL) if gpu_id >= 0: imgs = batch['img'].cuda(device=gpu_id) answers = batch['ans'].cuda(device=gpu_id) else: imgs = batch['img'] answers = batch['ans'] questions = batch['ques'] _, loss, acc = r.vqa(model, imgs, questions, targets=answers) loss.backward() loss = loss.detach() if log: summary_writer.add_scalar('Loss', loss, step) print('Step %d, VQA Loss: %f, Accuracy: %f %%' % (step, loss, acc)) elif task == 'hmdb': if (log and eval_interval is not None and i % eval_interval == 0): model = model.eval() val_loss = 0 val_acc = 0 print('-' * 100) print('Evaluation step') for b in tqdm(val_dl): vid, labels = b if gpu_id >= 0: vid = vid.cuda(device=gpu_id) labels = labels.cuda(device=gpu_id) _, loss, acc = r.hmdb(model, vid, targets=labels, mode='val') val_loss += float(loss.detach().cpu().numpy()) val_acc += acc val_loss /= len(val_dl) val_acc = (val_acc / len(val_dl)) summary_writer.add_scalar('Val_loss', val_loss, step) print('Step %d, HMDB validation loss: %f, Accuracy %f %%' % (step, val_loss, val_acc)) print('-' * 100) model = model.train() continue vid, labels = next(DL) if gpu_id >= 0: vid = vid.cuda(device=gpu_id) labels = labels.cuda(device=gpu_id) _, loss, acc = r.hmdb(model, vid, targets=labels, return_str_preds=True) loss.backward() loss = loss.detach() if log: summary_writer.add_scalar('Loss', loss, step) print('Step %d, HMDB Loss: %f, Accuracy: %f %%' % (step, loss, acc)) elif task == 'penn': if (log and eval_interval is not None and i % eval_interval == 0): model = model.eval() val_loss = 0 val_acc = 0 print('-' * 100) print('Evaluation step') for b in tqdm(test_dl): en = b['text'] targets = b['tokens'] pad_id = b['pad_id'] pad_mask = b['pad_mask'] if gpu_id >= 0: targets = targets.to(gpu_id) pad_mask = pad_mask.to(gpu_id) _, loss, acc = r.penn(model, en, target_pad_mask=pad_mask, pad_id=pad_id, targets=targets, mode='val', return_str_preds=True) loss = loss.detach() val_loss += float(loss.cpu().numpy()) val_acc += acc val_loss /= len(val_dl) val_acc = (val_acc / len(val_dl)) summary_writer.add_scalar('Val_loss', val_loss, step) print('Step %d, PENN validation loss: %f, Accuracy %f %%' % (step, val_loss, val_acc)) print('-' * 100) model = model.train() batch = next(DL) en = batch['text'] targets = batch['tokens'] pad_id = batch['pad_id'] pad_mask = batch['pad_mask'] if gpu_id >= 0: targets = targets.to(gpu_id) pad_mask = pad_mask.to(gpu_id) _, loss, acc = r.penn(model, en, pad_id=pad_id, targets=targets, target_pad_mask=pad_mask) loss.backward() loss = loss.detach() if log: summary_writer.add_scalar('Loss', loss, step) print('Step %d, PENN Loss: %f, Accuracy: %f %%' % (step, loss, acc)) # End Calculate loss if gpu_id > 0: ensure_shared_grads(model, shared_model, gpu_id) optimizer.step() # Save model if (save_interval != None and (i + 1) % save_interval == 0): shared_model.save(model_save_path, step) sys.stdout.flush()