Esempio n. 1
0
 def __init__(self, keras_model, dataset_provider, test_dataset_provider):
     self._model = keras_model
     self._dataset_provider = test_dataset_provider
     self._preprocessor = dataset_provider.caption_preprocessor
     self._metrics = [BLEU(4), METEOR(),  CIDEr(), ROUGE(),SPICE()] #, ROUGE(),SPICE(), BERT()
     self._max_caption_length = 20
     self._beam_size = 1
def calculate_metrics(id_to_prediction_path, id_to_references_path,
                      model_name):
    # id_to_prediction = pickle.load(open(id_to_prediction_path, 'rb'))
    id_to_prediction = yaml.safe_load(open(id_to_prediction_path, 'r'))
    id_to_references = pickle.load(open(id_to_references_path, 'rb'))
    metrics_list = [BLEU(4), METEOR(), CIDEr(), ROUGE(), SPICE(), BERT()]
    metrics_value = {}
    for metric in metrics_list:
        metric_name_to_value = metric.calculate(id_to_prediction,
                                                id_to_references)
        metrics_value.update(metric_name_to_value)

    save_path = './results/Flickr30KfusedFashion1101/evaluation-results/'
    name = os.path.join(
        save_path, 'test-fused-metrics-{}-{}-{}-cloth_unbalance.yaml'.format(
            3, 20, model_name))
    write_yaml_file(metrics_value, name)
def validate(val_loader,
             model,
             word_map,
             args,
             start_epoch,
             beam_search_type='dbs',
             beam_size=3):
    model.eval()
    rev_word_map = {v: k for k, v in word_map.items()}

    with torch.no_grad():
        references = {
        }  # references (true captions) for calculating BLEU-4 score
        hypotheses = {}  # hypotheses (predictions)
        prediction_save = {
        }  # because each image may have multiple predictions, we use another dict to save all the captions for one images with the key as filename
        gt_save = {}
        image_id = 0
        for i, (imgs, allcaps, caplens,
                img_filenames) in enumerate(val_loader):
            imgs = imgs.cuda()
            if beam_search_type == 'dbs':
                sentences = model.diverse_beam_search(imgs, beam_size,
                                                      word_map)
            elif beam_search_type == 'beam_search':
                sentences, _ = model.beam_search(imgs,
                                                 word_map,
                                                 beam_size=beam_size)
            elif beam_search_type == 'greedy':
                sentences, _ = model.greedy_search(imgs, word_map)
            else:
                raise NotImplementedError(
                    'please specify the decoding method in [dbs, beam_search, greedy] in string type'
                )
            # assert len(sentences) == batch_size
            img_filename = img_filenames[0]
            if img_filename not in prediction_save.keys():
                prediction_save[img_filename] = []
                gt_save[img_filename] = []
            for idx, sentence in enumerate(sentences):
                if not image_id in hypotheses.keys():
                    hypotheses[image_id] = []
                    references[image_id] = []
                hypotheses[image_id].append({'caption': sentence})
                prediction_save[img_filename].append(sentence)
                for ref_item in allcaps[0]:
                    # print(ref_item)
                    enc_ref = [
                        w.item() for w in ref_item if w.item() not in {
                            word_map['<start>'], word_map['<end>'],
                            word_map['<pad>'], word_map['<unk>']
                        }
                    ]
                    # print(enc_ref)
                    ref = ' '.join([
                        rev_word_map[enc_ref[i]] for i in range(len(enc_ref))
                    ])
                    if ref not in gt_save[img_filename]:
                        gt_save[img_filename].append(ref)
                    references[image_id].append({'caption': ref})
                image_id += 1
    # print(hypotheses)
    # print(references)
    results_dict = {}
    print("Calculating Evalaution Metric Scores......\n")
    avg_bleu_dict = BLEU().calculate(hypotheses, references)
    bleu4 = avg_bleu_dict['bleu_4']
    avg_cider_dict = CIDEr().calculate(hypotheses, references)
    cider = avg_cider_dict['cider']
    avg_bert_dict = BERT().calculate(hypotheses, references)
    bert = avg_bert_dict['bert']
    avg_spice_dict = SPICE().calculate(hypotheses, references)
    avg_rouge_dict = ROUGE().calculate(hypotheses, references)
    avg_meteor_dict = METEOR().calculate(hypotheses, references)
    print(
        f'Evaluatioin results, BLEU-4: {bleu4}, Cider: {cider}, SPICE: {avg_spice_dict["spice"]}, ROUGE: {avg_rouge_dict["rouge"]}'
    )
    results_dict.update(avg_bert_dict)
    results_dict.update(avg_bleu_dict)
    results_dict.update(avg_cider_dict)
    results_dict.update(avg_rouge_dict)
    results_dict.update(avg_spice_dict)
    results_dict.update(avg_meteor_dict)

    # write the predictions and ground truth to files
    prediction_filename = f'predictions_{args.dataset}_split_{args.test_split}_{beam_search_type}_{beam_size}_epoch{start_epoch}.yaml'
    gt_filename = f'reference_{args.dataset}_split_{args.test_split}_{beam_search_type}_{beam_size}.yaml'
    with open(
            os.path.join(args.save_path, args.encoder, args.dataset,
                         prediction_filename), 'w') as f:
        yaml.safe_dump(prediction_save, f)
        f.close()
    with open(
            os.path.join(args.save_path, args.encoder, args.dataset,
                         gt_filename), 'w') as f:
        yaml.safe_dump(gt_save, f)
        f.close()
    metrics_filename = f'metrics_{args.dataset}_split_{args.test_split}_{beam_search_type}_{beam_size}_epoch{start_epoch}.yaml'
    # write the evaluate metrics to files
    with open(
            os.path.join(args.save_path, args.encoder, args.dataset,
                         metrics_filename), 'w') as f:
        yaml.safe_dump(results_dict, f)
        f.close()
def validate(val_loader,
             model,
             word_map,
             beam_size,
             epoch,
             beam_search_type='greedy'):
    model.eval()
    rev_word_map = {v: k for k, v in word_map.items()}
    with torch.no_grad():
        references = {
        }  # references (true captions) for calculating BLEU-4 score
        hypotheses = {}  # hypotheses (predictions)
        prediction_save = {
        }  # because each image may have multiple predictions, we use another dict to save all the captions for one images with the key as filename
        gt_save = {}
        image_id = 0
        for i, (imgs, allcaps, caplens,
                img_filenames) in enumerate(val_loader):
            imgs = imgs.cuda()
            if beam_search_type == 'dbs':
                sentences = model.diverse_beam_search(imgs, beam_size,
                                                      word_map)
            elif beam_search_type == 'beam_search':
                sentences, _ = model.beam_search(imgs,
                                                 word_map,
                                                 beam_size=beam_size)
            elif beam_search_type == 'greedy':
                sentences, _ = model.greedy_search(imgs, word_map)
            else:
                raise NotImplementedError(
                    'please specify the decoding method in [dbs, beam_search, greedy] in string type'
                )
            # assert len(sentences) == batch_size
            img_filename = img_filenames[0]
            if img_filename not in prediction_save.keys():
                prediction_save[img_filename] = []
                gt_save[img_filename] = []
            for idx, sentence in enumerate(sentences):
                if not image_id in hypotheses.keys():
                    hypotheses[image_id] = []
                    references[image_id] = []
                hypotheses[image_id].append({'caption': sentence})
                prediction_save[img_filename].append(sentence)
                for ref_item in allcaps[0]:
                    # print(ref_item)
                    enc_ref = [
                        w.item() for w in ref_item if w.item() not in {
                            word_map['<start>'], word_map['<end>'],
                            word_map['<pad>'], word_map['<unk>']
                        }
                    ]
                    ref = ' '.join([
                        rev_word_map[enc_ref[i]] for i in range(len(enc_ref))
                    ])
                    if ref not in gt_save[img_filename]:
                        gt_save[img_filename].append(ref)
                    references[image_id].append({'caption': ref})
                image_id += 1
    # print(hypotheses)
    print("Calculating Evalaution Metric Scores......\n")
    avg_bleu_dict = BLEU().calculate(hypotheses, references)
    bleu4 = avg_bleu_dict['bleu_4']
    avg_cider_dict = CIDEr().calculate(hypotheses, references)
    cider = avg_cider_dict['cider']
    avg_spice_dict = SPICE().calculate(hypotheses, references)
    avg_rouge_dict = ROUGE().calculate(hypotheses, references)

    print(
        f'Evaluatioin results at Epoch {epoch}, BLEU-4: {bleu4}, Cider: {cider}, SPICE: {avg_spice_dict["spice"]}, ROUGE: {avg_rouge_dict["rouge"]}'
    )
    return bleu4, cider