Example #1
def CocoScore(ref, hyp, metrics_list=None, language='en'):
    Obtains the COCO scores from the references and hypotheses.

    :param ref: Dictionary of reference sentences (id, sentence)
    :param hyp: Dictionary of hypothesis sentences (id, sentence)
    :param metrics_list: List of metrics to evaluate on
    :param language: Language of the sentences (for METEOR)
    :return: dictionary of scores
    if metrics_list is None:
        metrics_list = ['bleu', 'ter', 'meteor', 'rouge_l', 'cider']
        metrics_list = [metric.lower() for metric in metrics_list]
    scorers = []
    if 'bleu' in metrics_list:
        scorers.append((Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]))
    if 'meteor' in metrics_list:
        scorers.append((Meteor(language), "METEOR"))
    if 'ter' in metrics_list:
        scorers.append((Ter(), "TER"))
    if 'rouge_l' in metrics_list or 'rouge' in metrics_list:
        scorers.append((Rouge(), "ROUGE_L"))
    if 'cider' in metrics_list:
        scorers.append((Cider(), "CIDEr"))

    final_scores = {}
    for scorer, method in scorers:
        score, _ = scorer.compute_score(ref, hyp)
        if isinstance(score, list):
            for m, s in zip(method, score):
                final_scores[m] = s
            final_scores[method] = score
    return final_scores
Example #2
def CocoScore(ref, hypo, language='en'):
    Obtains the COCO scores from the references and hypotheses.

    :param ref: Dictionary of reference sentences (id, sentence)
    :param hypo: Dictionary of hypothesis sentences (id, sentence)
    :param language: Language of the sentences (for METEOR)
    :return: dictionary of scores
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(language), "METEOR"), (Ter(), "TER"),
               (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]
    final_scores = {}
    for scorer, method in scorers:
        score, scores = scorer.compute_score(ref, hypo)
        if type(score) == list:
            for m, s in zip(method, score):
                final_scores[m] = s
            final_scores[method] = score
    return final_scores
Example #3
def get_coco_score(pred_list, verbose, extra_vars, split, **kwargs):
    COCO challenge metrics
    :param pred_list: dictionary of hypothesis sentences (id, sentence)
    :param verbose: if greater than 0 the metric measures are printed out
    :param extra_vars: extra variables, here are:
            extra_vars['references'] - dict mapping sample indices to list with all valid captions (id, [sentences])
            extra_vars['tokenize_f'] - tokenization function used during model training (used again for validation)
            extra_vars['detokenize_f'] - detokenization function used during model training (used again for validation)
            extra_vars['tokenize_hypotheses'] - Whether tokenize or not the hypotheses during evaluation
    :param split: split on which we are evaluating
    :return: Dictionary with the coco scores
    from pycocoevalcap.bleu.bleu import Bleu
    from pycocoevalcap.meteor.meteor import Meteor
    from pycocoevalcap.meteor import accepted_langs
    from pycocoevalcap.cider.cider import Cider
    from pycocoevalcap.rouge.rouge import Rouge
    from pycocoevalcap.ter.ter import Ter

    gts = extra_vars[split]['references']
    if extra_vars.get('tokenize_hypotheses', False):
        hypo = {
            idx: list(map(extra_vars['tokenize_f'], [lines.strip()]))
            for (idx, lines) in list(enumerate(pred_list))
        hypo = {
            idx: [lines.strip()]
            for (idx, lines) in list(enumerate(pred_list))

    # Tokenize refereces if needed
    if extra_vars.get('tokenize_references', False):
        refs = {
            idx: list(map(extra_vars['tokenize_f'], gts[idx]))
            for idx in list(gts)
        refs = gts

    # Detokenize references if needed.
    if extra_vars.get('apply_detokenization', False):
        refs = {
            idx: list(map(extra_vars['detokenize_f'], refs[idx]))
            for idx in refs

    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Ter(), "TER"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]
    if extra_vars.get('language', 'en') in accepted_langs:
            (Meteor(language=extra_vars.get('language', 'en')), "METEOR"))

    final_scores = {}
    for scorer, method in scorers:
        score, _ = scorer.compute_score(refs, hypo)
        if isinstance(score, list):
            for m, s in list(zip(method, score)):
                final_scores[m] = s
            final_scores[method] = score

    if verbose > 0:
        logger.info('Computing coco scores on the %s split...' % split)
    for metric in sorted(final_scores):
        value = final_scores[metric]
        logger.info(metric + ': ' + str(value))

    return final_scores
def get_coco_score(pred_list, verbose, extra_vars, split):
    COCO challenge metrics
    :param pred_list: dictionary of hypothesis sentences (id, sentence)
    :param verbose: if greater than 0 the metric measures are printed out
    :param extra_vars: extra variables, here are:
            extra_vars['references'] - dict mapping sample indices to list with all valid captions (id, [sentences])
            extra_vars['tokenize_f'] - tokenization function used during model training (used again for validation)
            extra_vars['detokenize_f'] - detokenization function used during model training (used again for validation)
            extra_vars['tokenize_hypotheses'] - Whether tokenize or not the hypotheses during evaluation
            extra_vars['tokenize_references'] - Whether tokenize or not the references during evaluation
    :param split: split on which we are evaluating
    :return: Dictionary with the coco scores
    from pycocoevalcap.bleu.bleu import Bleu
    from pycocoevalcap.meteor.meteor import Meteor
    from pycocoevalcap.meteor import accepted_langs
    from pycocoevalcap.cider.cider import Cider
    from pycocoevalcap.rouge.rouge import Rouge
    from pycocoevalcap.ter.ter import Ter

    gts = extra_vars[split]['references']

    #print("Gts = ", list(gts.values())[:10])
    #print("Pred_list = ", pred_list[:10])

    tok_hypo = extra_vars.get('tokenize_hypotheses', False)
    #print("TOK_HYPO: ", tok_hypo)
    if isinstance(tok_hypo, list) and tok_hypo[0]:
        hypo = {
            idx: list(map(extra_vars['tokenize_f'], [lines.strip()]))
            for (idx, lines) in list(enumerate(pred_list))
    elif tok_hypo and not isinstance(tok_hypo, list):
        hypo = {
            idx: list(map(extra_vars['tokenize_f'], [lines.strip()]))
            for (idx, lines) in list(enumerate(pred_list))
        hypo = {
            idx: [lines.strip()]
            for (idx, lines) in list(enumerate(pred_list))
    # if
    #     hypo = {idx: list(map(extra_vars['tokenize_f'], [lines.strip()])) for (idx, lines) in list(enumerate(pred_list))}
    # else:
    #     hypo = {idx: [lines.strip()] for (idx, lines) in list(enumerate(pred_list))}

    tok_ref = extra_vars.get('tokenize_references', False)
    #print("TOK_REF: ", tok_ref)
    if isinstance(tok_ref, list) and tok_ref[0]:
        refs = {
            idx: list(map(extra_vars['tokenize_f'], gts[idx]))
            for idx in list(gts)
    elif tok_ref and not isinstance(tok_ref, list):
        refs = {
            idx: list(map(extra_vars['tokenize_f'], gts[idx]))
            for idx in list(gts)
        refs = gts

    # # Tokenize refereces if needed
    # print(extra_vars.get('tokenize_references'))
    # if extra_vars.get('tokenize_references', False):
    #     print("TOKENIZO")
    #     refs = {idx: list(map(extra_vars['tokenize_f'], gts[idx])) for idx in list(gts)}
    # else:
    #     print("NO TOKENIZO")
    #     refs = gts

    # Detokenize references if needed.
    # Hypotheses are already detokenized in callbacks.py
    if extra_vars.get('apply_detokenization_ref', False):
        refs = {
            idx: list(map(extra_vars['detokenize_f'], refs[idx]))
            for idx in refs
        #hypo = {idx: [extra_vars['detokenize_f'](' '.join(line))] for idx, line in hypo.iteritems()}

    print("Hypotheses = ", list(hypo.values())[:5])
    print("References = ", list(refs.values())[:5])

    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Ter(), "TER"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]
    if extra_vars.get('language', 'en') in accepted_langs:
            (Meteor(language=extra_vars.get('language', 'en')), "METEOR"))

    final_scores = {}
    for scorer, method in scorers:
        score, _ = scorer.compute_score(refs, hypo)
        if isinstance(score, list):
            for m, s in list(zip(method, score)):
                final_scores[m] = s
            final_scores[method] = score

    if verbose > 0:
        logging.info('Computing coco scores on the %s split...' % split)
    for metric in sorted(final_scores):
        value = final_scores[metric]
        logging.info(metric + ': ' + str(value))

    return final_scores