def main( args ): ''' model: trained model to be evaluated args: parameters ''' # Load vocabulary wrapper. with open( args.vocab_path, 'rb') as f: vocab = pickle.load( f ) # Load trained model model = Encoder2Decoder( args.embed_size, len(vocab), args.hidden_size ) model.load_state_dict(torch.load(args.trained)) # Change to GPU mode if available if torch.cuda.is_available(): model.cuda() model.eval() transform = transforms.Compose([ transforms.Resize( (args.crop_size, args.crop_size) ), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) # Wrapper the COCO VAL dataset eval_data_loader = torch.utils.data.DataLoader( CocoEvalLoader( args.image_dir, args.caption_test_path, args.topic_path, transform ), batch_size = args.eval_size, shuffle = False, num_workers = args.num_workers, drop_last = False ) epoch = int( args.trained.split('/')[-1].split('-')[1].split('.')[0] ) # Generated captions to be compared with GT results = [] print '---------------------Start evaluation on MS-COCO dataset-----------------------' for i, (images, image_ids, _, T_val ) in enumerate( eval_data_loader ): images = to_var( images ) T_val = to_var( T_val ) generated_captions = model.sampler( epoch, images, T_val ) if torch.cuda.is_available(): captions = generated_captions.cpu().data.numpy() else: captions = generated_captions.data.numpy() # Build caption based on Vocabulary and the '<end>' token for image_idx in range( captions.shape[0] ): sampled_ids = captions[ image_idx ] sampled_caption = [] for word_id in sampled_ids: word = vocab.idx2word[ word_id ] if word == '<end>': break else: sampled_caption.append( word ) sentence = ' '.join( sampled_caption ) temp = { 'image_id': int( image_ids[ image_idx ] ), 'caption': sentence} results.append( temp ) # Disp evaluation process if (i+1) % 10 == 0: print '[%d/%d]'%( (i+1),len( eval_data_loader ) ) print '------------------------Caption Generated-------------------------------------' # Evaluate the results based on the COCO API resFile = args.save_path json.dump( results, open( resFile , 'w' ) ) annFile = args.caption_test_path coco = COCO( annFile ) cocoRes = coco.loadRes( resFile ) cocoEval = COCOEvalCap( coco, cocoRes ) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() print '-----------Evaluation performance on MS-COCO dataset----------' for metric, score in cocoEval.eval.items(): print '%s: %.4f'%( metric, score )
def coco_eval(model, args, epoch): ''' model: trained model to be evaluated args: pre-set parameters epoch: epoch #, for disp purpose ''' model.eval() # Validation images are required to be resized to 224x224 already transform = transforms.Compose([ transforms.Scale((args.crop_size, args.crop_size)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load the vocabulary with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Wrapper the COCO VAL dataset eval_data_loader = torch.utils.data.DataLoader( CocoEvalLoader(args.image_dir, args.caption_val_path, transform), batch_size=args.eval_size, shuffle=False, num_workers=args.num_workers, drop_last=False) # Generated captions to be compared with GT results = [] print '---------------------Start evaluation on MS-COCO dataset-----------------------' for i, (images, image_ids, _) in enumerate(eval_data_loader): images = to_var(images) generated_captions, _, _ = model.sampler(images) if torch.cuda.is_available(): captions = generated_captions.cpu().data.numpy() else: captions = generated_captions.data.numpy() # Build caption based on Vocabulary and the '<end>' token for image_idx in range(captions.shape[0]): sampled_ids = captions[image_idx] sampled_caption = [] for word_id in sampled_ids: word = vocab.idx2word[word_id] if word == '<end>': break else: sampled_caption.append(word) sentence = ' '.join(sampled_caption) temp = {'image_id': int(image_ids[image_idx]), 'caption': sentence} results.append(temp) # Disp evaluation process if (i + 1) % 10 == 0: print '[%d/%d]' % ((i + 1), len(eval_data_loader)) print '------------------------Caption Generated-------------------------------------' # Evaluate the results based on the COCO API resFile = 'results/mixed-' + str(epoch) + '.json' json.dump(results, open(resFile, 'w')) annFile = args.caption_val_path coco = COCO(annFile) cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # Get CIDEr score for validation evaluation cider = 0. print '-----------Evaluation performance on MS-COCO validation dataset for Epoch %d----------' % ( epoch) for metric, score in cocoEval.eval.items(): print '%s: %.4f' % (metric, score) if metric == 'CIDEr': cider = score return cider
def main(args): ''' model: trained model to be evaluated args: parameters ''' # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Create results directory if not os.path.exists(os.path.join(args.result_path, args.basic_model)): os.makedirs(os.path.join(args.result_path, args.basic_model)) # List and sort all checkpoints in the storage directory if args.use_MIA: checkpoint_dir = os.path.join(args.save_dir_path, args.basic_model + "-MIA") else: checkpoint_dir = os.path.join(args.save_dir_path, args.basic_model) checkpoint_list = os.listdir(checkpoint_dir) checkpoint_list.sort() # Load Caption Model for checkpoint in checkpoint_list: checkpoint_path = os.path.join(checkpoint_dir, checkpoint) Caption_Generator = Generator(args, checkpoint_path, len(vocab)) transform = transforms.Compose([ transforms.Resize((args.crop_size, args.crop_size)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Wrapper the COCO VAL dataset eval_data_loader = torch.utils.data.DataLoader( CocoEvalLoader(args.image_dir, args.caption_test_path, args.concept_path, vocab, transform), batch_size=args.eval_batch_size, shuffle=False, num_workers=args.num_workers, drop_last=False) epoch = int(checkpoint.split('-')[1].split('.')[0]) # Generated captions to be compared with GT results = [] print '---------------------Start evaluation on MS-COCO dataset-----------------------' for i, (images, image_concepts, image_ids, _) in enumerate(eval_data_loader): images = to_var(images) image_concepts = to_var(image_concepts) all_hyp, all_scores = Caption_Generator.translate_batch( images, image_concepts) # Build caption based on Vocabulary and the '<end>' token for image_idx in range(len(all_hyp)): all_sentence = [] for num_i in range(args.n_best): sampled_ids = all_hyp[image_idx][num_i] sampled_caption = [] for word_id in sampled_ids: word = vocab.idx2word[word_id] if word == '<end>': break else: sampled_caption.append(word) sentence = ' '.join(sampled_caption) all_sentence.append(sentence) best_sentence = all_sentence[0] temp = { 'image_id': int(image_ids[image_idx]), 'caption': best_sentence } results.append(temp) # Disp evaluation process if (i + 1) % (1000 / args.eval_batch_size) == 0: print '[%d/%d]' % ((i + 1), len(eval_data_loader)) print '------------------------Caption Generated-------------------------------------' # Evaluate the results based on the COCO API resFile = os.path.join(args.result_path, args.basic_model, 'Caption-%d.json' % (epoch)) json.dump(results, open(resFile, 'w')) annFile = args.caption_test_path coco = COCO(annFile) cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() print '-----------Evaluation performance on MS-COCO dataset----------' if args.use_MIA: save_file = args.save_score_file + '-' + args.basic_model + "-MIA" else: save_file = args.save_score_file + '-' + args.basic_model f = open(save_file, 'a') f.write('\n The evaluation scores about epoch %d are: \n' % (epoch)) for metric, score in cocoEval.eval.items(): f.write('\n%s: %.4f\n' % (metric, score)) print '%s: %.4f' % (metric, score) f.close()