コード例 #1
0
def save_data(summaries, metrics, output_dir: str):
    for language in summaries.keys():
        code = LANGUAGE_CODES[language]

        with JsonlWriter(
                f'{output_dir}/{code}.summaries.jsonl') as out_summaries:
            with JsonlWriter(
                    f'{output_dir}/{code}.metrics.jsonl') as out_metrics:
                for instance_id in sorted(summaries[language].keys()):
                    for summarizer_id in sorted(
                            summaries[language][instance_id].keys()):
                        summary = summaries[language][instance_id][
                            summarizer_id]
                        instance_metrics = metrics[language][instance_id][
                            summarizer_id]
                        references = get_references(summaries[language],
                                                    instance_id, summarizer_id)

                        out_summaries.write({
                            'instance_id':
                            instance_id,
                            'summarizer_id':
                            summarizer_id,
                            'summarizer_type':
                            summary['summarizer_type'],
                            'summary':
                            summary,
                            'references':
                            references
                        })
                        out_metrics.write(
                            Metrics(instance_id, summarizer_id,
                                    summary['summarizer_type'],
                                    instance_metrics))
コード例 #2
0
def get_initial_micro_list(instances: List[EvalInstance]) -> List[Metrics]:
    micro_list = []
    for instance in instances:
        micro_list.append(
            Metrics(instance.instance_id, instance.summarizer_id,
                    instance.summarizer_type))
    return micro_list
コード例 #3
0
def save_metrics(summaries: Dict[str, Dict[str, List[str]]],
                 metrics: Dict[str, Dict[str, List[int]]], output_dir: str):
    with JsonlWriter(f'{output_dir}/task1.summaries.jsonl') as out_summaries:
        with JsonlWriter(f'{output_dir}/task1.metrics.jsonl') as out_metrics:
            for instance_id in sorted(summaries.keys()):
                for summarizer_id in summaries[instance_id].keys():
                    summary = summaries[instance_id][summarizer_id]
                    instance_metrics = metrics[instance_id][summarizer_id]
                    references = get_references(summaries, instance_id,
                                                summarizer_id)

                    out_summaries.write({
                        'instance_id':
                        instance_id,
                        'summarizer_id':
                        summarizer_id,
                        'summarizer_type':
                        summary['summarizer_type'],
                        'summary':
                        summary,
                        'references':
                        references
                    })
                    out_metrics.write(
                        Metrics(instance_id, summarizer_id,
                                summary['summarizer_type'], instance_metrics))
コード例 #4
0
def _load_generic_scores(input_file: str):
    data = json.load(open(input_file, 'r'))
    instances = []
    metrics_list = []
    documents = {}
    for i, instance in enumerate(data):
        document = instance['text'].strip()
        summary = instance['summary'].strip()
        scores = instance['scores']
        summarizer_id = str(i)

        if document not in documents:
            documents[document] = str(len(documents))
        instance_id = documents[document]

        instances.append({
            'instance_id': instance_id,
            'summarizer_id': summarizer_id,
            'summarizer_type': 'peer',
            'summary': {
                'text': summary
            },
            'document': {
                'text': document
            }
        })
        metrics_list.append(
            Metrics(instance_id, summarizer_id, 'peer',
                    MetricsDict({'generic_quality': scores})))
    return instances, metrics_list
コード例 #5
0
ファイル: metrics.py プロジェクト: ibeltagy/sacrerouge
def save_mds_metrics(summaries: Dict[str, Dict[str, List[str]]],
                     metrics: Dict[str, Dict[str, List[int]]], task_name: str,
                     output_dir: str):
    for length in ['10', '50', '100', '200']:
        with JsonlWriter(f'{output_dir}/{task_name}.{length}.summaries.jsonl'
                         ) as out_summaries:
            with JsonlWriter(f'{output_dir}/{task_name}.{length}.metrics.jsonl'
                             ) as out_metrics:
                for instance_id in sorted(summaries.keys()):
                    for summarizer_id in summaries[instance_id][length].keys():
                        summary = summaries[instance_id][length][summarizer_id]
                        instance_metrics = metrics[instance_id][length][
                            summarizer_id]
                        if len(instance_metrics) == 0:
                            continue

                        references = get_mds_references(
                            summaries, instance_id, length, summarizer_id)

                        out_summaries.write({
                            'instance_id':
                            instance_id,
                            'summarizer_id':
                            summarizer_id,
                            'summarizer_type':
                            summary['summarizer_type'],
                            'summary':
                            summary,
                            'references':
                            references
                        })
                        out_metrics.write(
                            Metrics(instance_id, summarizer_id,
                                    summary['summarizer_type'],
                                    instance_metrics))
コード例 #6
0
def main(args):
    random.seed(args.random_seed)

    instances = JsonlReader(args.input_jsonl, Metrics).read()
    mapping = map_summarizer_id_to_results(instances)

    # Pick the specific references which are sampled for each instance_id. Be consistent and use these to score
    # all of the peer summaries
    instance_to_references = get_instance_id_to_references(instances)
    sampled_references = sample_references(instance_to_references,
                                           args.num_references)

    with JsonlWriter(args.output_file) as out:
        for (instance_id, summarizer_id,
             summarizer_type), metrics_dict in mapping.items():
            reference_ids = sampled_references[instance_id]
            metrics = sum(
                [metrics_dict[reference_id]
                 for reference_id in reference_ids]) / len(reference_ids)
            # Hacky: APES_num_correct should be the sum, not the average, so undo the division
            if 'APES' in metrics:
                metrics['APES']['num_correct'] *= len(reference_ids)

            out.write(
                Metrics(instance_id, summarizer_id, summarizer_type, metrics))
コード例 #7
0
def _get_initial_metrics_dicts(
        instances: List[EvalInstance]) -> Dict[str, Dict[str, Metrics]]:
    metrics_dicts = defaultdict(dict)
    for instance in instances:
        metrics = Metrics(instance.instance_id, instance.summarizer_id,
                          instance.summarizer_type)
        metrics_dicts[instance.instance_id][instance.summarizer_id] = metrics
    return metrics_dicts
コード例 #8
0
def save_summaries_and_metrics(summaries, metrics, output_dir: str):
    with JsonlWriter(f'{output_dir}/task1.A-B.summaries.jsonl') as out_summaries_A_B:
        with JsonlWriter(f'{output_dir}/task1.A.summaries.jsonl') as out_summaries_A:
            with JsonlWriter(f'{output_dir}/task1.B.summaries.jsonl') as out_summaries_B:
                with JsonlWriter(f'{output_dir}/task1.A-B.metrics.jsonl') as out_metrics_A_B:
                    with JsonlWriter(f'{output_dir}/task1.A.metrics.jsonl') as out_metrics_A:
                        with JsonlWriter(f'{output_dir}/task1.B.metrics.jsonl') as out_metrics_B:
                            for instance_id in sorted(summaries.keys()):
                                for summarizer_id in sorted(summaries[instance_id].keys()):
                                    summary_A = summaries[instance_id][summarizer_id]['A']
                                    summary_B = summaries[instance_id][summarizer_id]['B']

                                    references_A = get_references(summaries, instance_id, summarizer_id, 'A')
                                    references_B = get_references(summaries, instance_id, summarizer_id, 'B')

                                    metrics_A = metrics[instance_id]['A'][summarizer_id]
                                    metrics_B = metrics[instance_id]['B'][summarizer_id]

                                    summary_instance_A = {
                                        'instance_id': f'{instance_id}-A',
                                        'summarizer_id': summarizer_id,
                                        'summarizer_type': summary_A['summarizer_type'],
                                        'summary': summary_A,
                                        'references': references_A,
                                    }
                                    summary_instance_B = {
                                        'instance_id': f'{instance_id}-B',
                                        'summarizer_id': summarizer_id,
                                        'summarizer_type': summary_B['summarizer_type'],
                                        'summary': summary_B,
                                        'references': references_B,
                                        'metrics': metrics_B
                                    }
                                    metric_instance_A = Metrics(f'{instance_id}-A', summarizer_id, summary_A['summarizer_type'], metrics_A)
                                    metric_instance_B = Metrics(f'{instance_id}-B', summarizer_id, summary_B['summarizer_type'], metrics_B)

                                    out_summaries_A_B.write(summary_instance_A)
                                    out_summaries_A_B.write(summary_instance_B)
                                    out_summaries_A.write(summary_instance_A)
                                    out_summaries_B.write(summary_instance_B)

                                    out_metrics_A_B.write(metric_instance_A)
                                    out_metrics_A_B.write(metric_instance_B)
                                    out_metrics_A.write(metric_instance_A)
                                    out_metrics_B.write(metric_instance_B)
コード例 #9
0
    def test_convert_to_matrices(self):
        metrics_list = [
            Metrics('1', 'A', 'peer', {
                'm1': 1,
                'm2': 2,
                'm3': 3
            }),
            Metrics('2', 'A', 'peer', {
                'm1': 4,
                'm2': 5
            }),
            Metrics('1', 'B', 'peer', {
                'm1': 6,
                'm2': 7,
                'm3': 8
            }),
            Metrics('2', 'B', 'peer', {
                'm1': 9,
                'm2': 10,
                'm3': 11
            }),
        ]
        m1 = convert_to_matrices(metrics_list, 'm1')
        np.testing.assert_array_equal(m1, [[1, 4], [6, 9]])

        m1, m2 = convert_to_matrices(metrics_list, 'm1', 'm2')
        np.testing.assert_array_equal(m1, [[1, 4], [6, 9]])
        np.testing.assert_array_equal(m2, [[2, 5], [7, 10]])

        m3 = convert_to_matrices(metrics_list, 'm3')
        np.testing.assert_array_equal(m3, [[3, np.nan], [8, 11]])

        metrics_list = [
            Metrics('1', 'A', 'peer', {
                'm1': 1,
                'm2': 2
            }),
            Metrics('2', 'A', 'peer', {
                'm1': 4,
                'm2': 5
            }),
            Metrics('1', 'B', 'peer', {
                'm1': 6,
                'm2': 7
            }),
            Metrics('3', 'B', 'peer', {
                'm1': 2,
                'm2': 9
            }),
        ]
        m1 = convert_to_matrices(metrics_list, 'm1')
        np.testing.assert_array_equal(m1, [[1, 4, np.nan], [6, np.nan, 2]])
コード例 #10
0
ファイル: mds_metrics.py プロジェクト: ibeltagy/sacrerouge
def save_data(model_summaries, peer_summaries, metrics, output_dir):
    for language in model_summaries.keys():
        with JsonlWriter(f'{output_dir}/{language}.summaries.jsonl') as out_summaries:
            if language in metrics:
                out_metrics = open(f'{output_dir}/{language}.metrics.jsonl', 'w')
            else:
                out_metrics = None

            for instance_id in sorted(model_summaries[language].keys()):
                references = list(model_summaries[language][instance_id][key] for key in sorted(model_summaries[language][instance_id].keys()))
                for summarizer_id in sorted(peer_summaries[language][instance_id].keys()):
                    summary = peer_summaries[language][instance_id][summarizer_id]
                    out_summaries.write({
                        'instance_id': instance_id,
                        'summarizer_id': summarizer_id,
                        'summarizer_type': 'peer',
                        'summary': summary,
                        'references': references
                    })
                    if out_metrics is not None:
                        instance_metrics = metrics[language][instance_id][summarizer_id]
                        out_metrics.write(jsons.dumps(Metrics(instance_id, summarizer_id, 'peer', instance_metrics)) + '\n')

                for i, reference in enumerate(references):
                    summarizer_id = reference['summarizer_id']
                    out_summaries.write({
                        'instance_id': instance_id,
                        'summarizer_id': summarizer_id,
                        'summarizer_type': 'reference',
                        'summary': reference,
                        'references': references[:i] + references[i + 1:]
                    })
                    if out_metrics is not None:
                        instance_metrics = metrics[language][instance_id][summarizer_id]
                        out_metrics.write(jsons.dumps(Metrics(instance_id, summarizer_id, 'reference', instance_metrics)) + '\n')

            if out_metrics is not None:
                out_metrics.close()
コード例 #11
0
    def test_serialization(self):
        instance_id = 'd500'
        summarizer_id = '5'
        summarizer_type = 'peer'
        metrics_dict = {'a': 4, 'b': {'c': [1, 2]}}
        metrics = Metrics(instance_id, summarizer_id, summarizer_type,
                          metrics_dict)

        serialized = jsons.dumps(metrics)
        assert serialized == '{"instance_id": "d500", "summarizer_id": "5", "summarizer_type": "peer", "metrics": {"a": 4, "b": {"c": [1, 2]}}}'

        deserialized = jsons.loads(serialized, Metrics)
        assert metrics == deserialized
        assert isinstance(deserialized.metrics, MetricsDict)
コード例 #12
0
def _load_dailynews_aspects(input_file: str):
    data = json.load(open(input_file, 'r'))
    instances = []
    metrics_list = []
    documents = {}
    for i, instance in enumerate(data):
        document = instance['text'].strip()
        summary = instance['summary'].strip()
        fluent = instance['map_quality_scores']['Fluent']['scores']
        understandable = instance['map_quality_scores']['Understandable'][
            'scores']
        informative = instance['map_quality_scores']['Informative']['scores']
        compact = instance['map_quality_scores']['Compact']['scores']
        overall = instance['map_quality_scores']['Overall']['scores']
        summarizer_id = str(i)

        if document not in documents:
            documents[document] = str(len(documents))
        instance_id = documents[document]

        instances.append({
            'instance_id': instance_id,
            'summarizer_id': summarizer_id,
            'summarizer_type': 'peer',
            'summary': {
                'text': summary
            },
            'document': {
                'text': document
            }
        })
        metrics_list.append(
            Metrics(
                instance_id, summarizer_id, 'peer',
                MetricsDict({
                    'fluent': fluent,
                    'understandable': understandable,
                    'informative': informative,
                    'compact': compact,
                    'overall': overall,
                })))
    return instances, metrics_list
コード例 #13
0
ファイル: setup.py プロジェクト: danieldeutsch/sacrerouge
def convert_to_sacrerouge_instances_and_metrics(
        annotated_data: Dict, filtered_data: Dict,
        split: str) -> Tuple[List[Dict], List[Metrics]]:
    instances = []
    metrics_list = []
    for instance_dict in annotated_data.values():
        instance_id = str(instance_dict['doc_id'])

        for summarizer_id, summarizer_dict in instance_dict[
                'system_summaries'].items():
            summarizer_id = summarizer_id[:-4]  # strip .txt
            if split == 'abs' and summarizer_id == 'bart_out':
                # This should not be included in the abstractive models. See the Readme about
                # this dataset
                continue

            document_text = filtered_data[summarizer_id][instance_id]['src']
            if '<t>' in document_text:
                document_text = split_into_sentences(document_text)
                assert len(document_text) > 0
            document = {'text': document_text}

            summary_sentences = split_into_sentences(
                filtered_data[summarizer_id][instance_id]['out'])
            assert len(summary_sentences) > 0
            summary = {'text': summary_sentences}

            reference_sentences = split_into_sentences(
                filtered_data[summarizer_id][instance_id]['ref'])
            assert len(reference_sentences) > 0
            reference = {
                'summarizer_id': 'ground-truth',
                'summarizer_type': 'reference',
                'text': reference_sentences
            }

            metrics_dict = MetricsDict({
                'rouge-1': {
                    'precision':
                    float(summarizer_dict['scores']['rouge_1_precision']),
                    'recall':
                    float(summarizer_dict['scores']['rouge_1_recall']),
                    'f1':
                    float(summarizer_dict['scores']['rouge_1_f_score'])
                },
                'rouge-2': {
                    'precision':
                    float(summarizer_dict['scores']['rouge_2_precision']),
                    'recall':
                    float(summarizer_dict['scores']['rouge_2_recall']),
                    'f1':
                    float(summarizer_dict['scores']['rouge_2_f_score'])
                },
                'rouge-l': {
                    'precision':
                    float(summarizer_dict['scores']['rouge_l_precision']),
                    'recall':
                    float(summarizer_dict['scores']['rouge_l_recall']),
                    'f1':
                    float(summarizer_dict['scores']['rouge_l_f_score'])
                },
                'js-2':
                float(summarizer_dict['scores']['js-2']),
                'MoverScore':
                float(summarizer_dict['scores']['mover_score']),
                'bertscore': {
                    'precision':
                    float(summarizer_dict['scores']['bert_precision_score']),
                    'recall':
                    float(summarizer_dict['scores']['bert_recall_score']),
                    'f1':
                    float(summarizer_dict['scores']['bert_precision_score']),
                },
                'litepyramid': {
                    'recall':
                    float(summarizer_dict['scores']['litepyramid_recall'])
                }
            })
            instances.append({
                'instance_id': instance_id,
                'summarizer_id': summarizer_id,
                'summarizer_type': 'peer',
                'references': [reference],
                'summary': summary,
                'documents': [document]
            })
            metrics_list.append(
                Metrics(instance_id, summarizer_id, 'peer', metrics_dict))
    return instances, metrics_list
コード例 #14
0
def load_judgments(
    file_path: str
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Metrics]]:
    summaries = []
    summaries_with_crowd = []
    metrics_list = []

    with JsonlReader(file_path) as f:
        for instance in f:
            instance_id = instance['id']
            summarizer_id = instance['model_id']
            filename = instance['filepath']
            summary = {'text': instance['decoded']}
            references = instance['references']
            expert_annotations = instance['expert_annotations']
            turker_annotations = instance['turker_annotations']
            document = instance['text']

            # It appears that the first reference is always the ground-truth, the others are crowdsourced, although
            # this is not 100% confirmed. It is only based on me looking through a handful of examples.
            assert len(references) == 11
            references[0] = {
                'summarizer_id': 'ground-truth',
                'summarizer_type': 'reference',
                'text': references[0]
            }
            for i in range(1, 11):
                references[i] = {
                    'summarizer_id': f'turker-{i}',
                    'summarizer_type': 'reference',
                    'text': references[i]
                }

            summaries.append({
                'instance_id': instance_id,
                'summarizer_id': summarizer_id,
                'summarizer_type': 'peer',
                'file_path': filename,
                'document': {
                    'text': document
                },
                'summary': summary,
                'references': [references[0]]
            })
            summaries_with_crowd.append({
                'instance_id': instance_id,
                'summarizer_id': summarizer_id,
                'summarizer_type': 'peer',
                'file_path': filename,
                'document': {
                    'text': document
                },
                'summary': summary,
                'references': references
            })

            expert_metrics = MetricsDict({
                'coherence':
                [annotation['coherence'] for annotation in expert_annotations],
                'consistency': [
                    annotation['consistency']
                    for annotation in expert_annotations
                ],
                'fluency':
                [annotation['fluency'] for annotation in expert_annotations],
                'relevance':
                [annotation['relevance'] for annotation in expert_annotations]
            })
            turker_metrics = MetricsDict({
                'coherence':
                [annotation['coherence'] for annotation in turker_annotations],
                'consistency': [
                    annotation['consistency']
                    for annotation in turker_annotations
                ],
                'fluency':
                [annotation['fluency'] for annotation in turker_annotations],
                'relevance':
                [annotation['relevance'] for annotation in turker_annotations]
            })
            both = MetricsDict({
                'expert': expert_metrics,
                'turker': turker_metrics
            })
            metrics = Metrics(instance_id, summarizer_id, 'peer', both)
            metrics_list.append(metrics)

    return summaries, summaries_with_crowd, metrics_list
コード例 #15
0
def load_judgments(
    file_path: str
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Metrics]]:
    summaries = []
    summaries_with_crowd = []
    metrics_list = []

    with JsonlReader(file_path) as f:
        for instance in f:
            instance_id = instance['id']
            summarizer_id = instance['model_id']
            filename = instance['filepath']
            summary = {'text': instance['decoded']}
            references = instance['references']
            expert_annotations = instance['expert_annotations']
            turker_annotations = instance['turker_annotations']
            document = instance['text']

            # The first reference is always the ground-truth
            # https://github.com/Yale-LILY/SummEval/issues/8
            assert len(references) == 11
            references[0] = {
                'summarizer_id': 'ground-truth',
                'summarizer_type': 'reference',
                'text': references[0]
            }
            for i in range(1, 11):
                references[i] = {
                    'summarizer_id': f'turker-{i}',
                    'summarizer_type': 'reference',
                    'text': references[i]
                }

            summaries.append({
                'instance_id': instance_id,
                'summarizer_id': summarizer_id,
                'summarizer_type': 'peer',
                'file_path': filename,
                'document': {
                    'text': document
                },
                'summary': summary,
                'references': [references[0]]
            })
            summaries_with_crowd.append({
                'instance_id': instance_id,
                'summarizer_id': summarizer_id,
                'summarizer_type': 'peer',
                'file_path': filename,
                'document': {
                    'text': document
                },
                'summary': summary,
                'references': references
            })

            expert_metrics = MetricsDict({
                'coherence':
                [annotation['coherence'] for annotation in expert_annotations],
                'consistency': [
                    annotation['consistency']
                    for annotation in expert_annotations
                ],
                'fluency':
                [annotation['fluency'] for annotation in expert_annotations],
                'relevance':
                [annotation['relevance'] for annotation in expert_annotations]
            })
            turker_metrics = MetricsDict({
                'coherence':
                [annotation['coherence'] for annotation in turker_annotations],
                'consistency': [
                    annotation['consistency']
                    for annotation in turker_annotations
                ],
                'fluency':
                [annotation['fluency'] for annotation in turker_annotations],
                'relevance':
                [annotation['relevance'] for annotation in turker_annotations]
            })
            both = MetricsDict({
                'expert': expert_metrics,
                'turker': turker_metrics
            })
            metrics = Metrics(instance_id, summarizer_id, 'peer', both)
            metrics_list.append(metrics)

    return summaries, summaries_with_crowd, metrics_list