def eval(opt): model.eval() ######################################################################################### # eval begins here ######################################################################################### data_iter_val = iter(dataloader_val) loss_temp = 0 start = time.time() num_show = 0 predictions = [] count = 0 for step in range(len(dataloader_val)): data = data_iter_val.next() img, iseq, gts_seq, num, proposals, bboxs, box_mask, img_id = data proposals = proposals[:, :max(int(max(num[:, 1])), 1), :] input_imgs.data.resize_(img.size()).copy_(img) input_seqs.data.resize_(iseq.size()).copy_(iseq) gt_seqs.data.resize_(gts_seq.size()).copy_(gts_seq) input_num.data.resize_(num.size()).copy_(num) input_ppls.data.resize_(proposals.size()).copy_(proposals) gt_bboxs.data.resize_(bboxs.size()).copy_(bboxs) mask_bboxs.data.resize_(box_mask.size()).copy_(box_mask) input_imgs.data.resize_(img.size()).copy_(img) eval_opt = { 'sample_max': 1, 'beam_size': opt.beam_size, 'inference_mode': True, 'tag_size': opt.cbs_tag_size } seq, bn_seq, fg_seq = model(input_imgs, input_seqs, gt_seqs, \ input_num, input_ppls, gt_bboxs, mask_bboxs, 'sample', eval_opt) sents = utils.decode_sequence(dataset.itow, dataset.itod, dataset.ltow, dataset.itoc, dataset.wtod, \ seq.data, bn_seq.data, fg_seq.data, opt.vocab_size, opt) for k, sent in enumerate(sents): entry = {'image_id': img_id[k], 'caption': sent} predictions.append(entry) if num_show < 20: print('image %s: %s' % (entry['image_id'], entry['caption'])) num_show += 1 if count % 100 == 0: print(count) count += 1 print('Total image to be evaluated %d' % (len(predictions))) lang_stats = None if opt.language_eval == 1: if opt.decode_noc: lang_stats = utils.noc_eval(predictions, str(1), opt.val_split, opt) else: lang_stats = utils.language_eval(opt.dataset, predictions, str(1), opt.val_split, opt) print('Saving the predictions') if opt.inference_only: import json pdb.set_trace() # Write validation result into summary if tf is not None: for k, v in lang_stats.items(): add_summary_value(tf_summary_writer, k, v, iteration) tf_summary_writer.flush() val_result_history[iteration] = { 'lang_stats': lang_stats, 'predictions': predictions } return lang_stats
def eval_fusion_models(opt, dataset_val, imp_pro, spa_pro, sem_pro, imp_model=None, spa_model=None, sem_model=None): dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) input_imgs = torch.FloatTensor(1) input_seqs = torch.LongTensor(1) input_ppls = torch.FloatTensor(1) gt_bboxs = torch.FloatTensor(1) mask_bboxs = torch.ByteTensor(1) gt_seqs = torch.LongTensor(1) input_num = torch.LongTensor(1) if opt.cuda: input_imgs = input_imgs.cuda() input_seqs = input_seqs.cuda() gt_seqs = gt_seqs.cuda() input_num = input_num.cuda() input_ppls = input_ppls.cuda() gt_bboxs = gt_bboxs.cuda() mask_bboxs = mask_bboxs.cuda() input_imgs = Variable(input_imgs) input_seqs = Variable(input_seqs) gt_seqs = Variable(gt_seqs) input_num = Variable(input_num) input_ppls = Variable(input_ppls) gt_bboxs = Variable(gt_bboxs) mask_bboxs = Variable(mask_bboxs) data_iter_val = iter(dataloader_val) loss_temp = 0 start = time.time() num_show = 0 predictions = [] progress_bar = tqdm(dataloader_val, desc='|Validation process', leave=False) # for step in range(len(dataloader_val)): for step, data in enumerate(progress_bar): # data = data_iter_val.next() img, iseq, gts_seq, num, proposals, bboxs, box_mask, img_id, spa_adj_matrix, sem_adj_matrix = data # print(img_id) proposals = proposals[:, :max(int(max(num[:, 1])), 1), :] # print(proposals) # FF: Fix the bug with .data not run in the Pytorch input_imgs.resize_(img.size()).copy_(img) input_seqs.resize_(iseq.size()).copy_(iseq) gt_seqs.resize_(gts_seq.size()).copy_(gts_seq) input_num.resize_(num.size()).copy_(num) input_ppls.resize_(proposals.size()).copy_(proposals) gt_bboxs.resize_(bboxs.size()).copy_(bboxs) # FF: modify 0/1 to true/false mask_bboxs.resize_(box_mask.size()).copy_(box_mask.bool()) # mask_bboxs.data.resize_(box_mask.size()).copy_(box_mask) input_imgs.resize_(img.size()).copy_(img) if len(spa_adj_matrix[0]) != 0: spa_adj_matrix = spa_adj_matrix[:, :max(int(max(num[:, 1])), 1), : max(int(max(num[:, 1])), 1)] if len(sem_adj_matrix[0]) != 0: sem_adj_matrix = sem_adj_matrix[:, :max(int(max(num[:, 1])), 1), : max(int(max(num[:, 1])), 1)] # relationship modify eval_opt_rel = { 'imp_model': opt.imp_model, 'spa_model': opt.spa_model, 'sem_model': opt.sem_model, "graph_att": opt.graph_attention } pos_emb_var, spa_adj_matrix, sem_adj_matrix = prepare_graph_variables( opt.relation_type, proposals[:, :, :4], sem_adj_matrix, spa_adj_matrix, opt.nongt_dim, opt.imp_pos_emb_dim, opt.spa_label_num, opt.sem_label_num, eval_opt_rel) eval_opt = { 'sample_max': 1, 'beam_size': opt.beam_size, 'inference_mode': True, 'tag_size': opt.cbs_tag_size } seq, bn_seq, fg_seq, seqLogprobs, bnLogprobs, fgLogprobs, attention_weights = fusion_beam_sample( opt, imp_pro, spa_pro, sem_pro, input_ppls, input_imgs, input_num, pos_emb_var, spa_adj_matrix, sem_adj_matrix, eval_opt, imp_model, spa_model, sem_model) sents = utils.decode_sequence(dataset_val.itow, dataset_val.itod, dataset_val.ltow, dataset_val.itoc, dataset_val.wtod, seq.data, bn_seq.data, fg_seq.data, opt.vocab_size, opt) for k, sent in enumerate(sents): entry = {'image_id': img_id[k].item(), 'caption': sent} predictions.append(entry) if num_show < 20: print('image %s: %s' % (entry['image_id'], entry['caption'])) num_show += 1 if opt.graph_attention: for k in range(len(img_id)): save_attention(img_id[k], attention_weights[k], opt.att_weight_save) print('Total image to be evaluated %d' % (len(predictions))) lang_stats = None if opt.language_eval == 1: if opt.decode_noc: lang_stats = utils.noc_eval(predictions, str(1), opt.val_split, opt) else: lang_stats = utils.language_eval(opt.dataset, predictions, str(1), opt.val_split, opt) print('Saving the predictions') # Write validation result into summary # if tf is not None: # for k, v in lang_stats.items(): # add_summary_value(tf_summary_writer, k, v, iteration) # tf_summary_writer.flush() # TODO: change the train process # val_result_history[iteration] = {'lang_stats': lang_stats, 'predictions': predictions} # if wandb is not None: # wandb.log({k: v for k, v in lang_stats.items()}) return lang_stats, predictions