Ejemplo n.º 1
0
    def read(self, pyramid_jsonl: str,
             annotation_jsonl: str) -> List[EvalInstance]:
        logger.info(f'Loading Pyramids from {pyramid_jsonl}')
        pyramids = {}
        with JsonlReader(pyramid_jsonl, Pyramid) as f:
            for pyramid in f:
                pyramids[pyramid.instance_id] = pyramid
        logger.info(f'Loaded {len(pyramids)} pyramids')

        logger.info(f'Loading Pyramid annotations from {annotation_jsonl}')
        instances = []
        instance_ids = set()
        with JsonlReader(annotation_jsonl, PyramidAnnotation) as f:
            for annotation in f:
                fields = Fields({
                    'annotation':
                    PyramidAnnotationField(annotation),
                    'pyramid':
                    PyramidField(pyramids[annotation.instance_id])
                })

                instance = EvalInstance(annotation.instance_id,
                                        annotation.summarizer_id,
                                        annotation.summarizer_type, fields)
                instances.append(instance)

                instance_ids.add(annotation.instance_id)

            logger.info(f'Loaded {len(instances)} Pyramid annotations')

        if self.include_reference_annotations:
            logger.info(
                f'Generating Pyramid annotations for the reference summaries')
            reference_instances = []
            for instance_id in instance_ids:
                pyramid = pyramids[instance_id]

                # We can only do this if there are > 1 summaries used to construct the pyramid
                if len(pyramid.summarizer_ids) > 1:
                    for i in range(len(pyramid.summarizer_ids)):
                        annotation = pyramid.get_annotation(i)
                        reduced_pyramid = pyramid.remove_summary(i)
                        fields = Fields({
                            'annotation':
                            PyramidAnnotationField(annotation),
                            'pyramid':
                            PyramidField(reduced_pyramid)
                        })
                        instance = EvalInstance(annotation.instance_id,
                                                annotation.summarizer_id,
                                                annotation.summarizer_type,
                                                fields)
                        reference_instances.append(instance)
            logger.info(
                f'Generated {len(reference_instances)} reference summary annotations'
            )
            instances.extend(reference_instances)

        logger.info(f'Loaded a total of {len(instances)} instances')
        return instances
    def read(self) -> List[EvalInstance]:
        pyramids = JsonlReader(self.pyramid_jsonl, Pyramid).read()
        annotations = JsonlReader(self.annotation_jsonl,
                                  PyramidAnnotation).read()

        # Enumerate the peers
        instance_id_to_pyramid = {
            pyramid.instance_id: pyramid
            for pyramid in pyramids
        }
        eval_instances = []
        for annotation in annotations:
            summary = PyramidAnnotationField(annotation)
            pyramid = instance_id_to_pyramid[annotation.instance_id]
            fields = Fields({'pyramid': PyramidField(pyramid)})

            eval_instances.append(
                EvalInstance(annotation.instance_id, annotation.summarizer_id,
                             annotation.summarizer_type, summary, fields))

        # Enumerate the references
        for pyramid in pyramids:
            for i, summary in enumerate(pyramid.summaries):
                annotation = pyramid.get_annotation(i)
                summary = PyramidAnnotationField(annotation)

                fields = Fields(
                    {'pyramid': PyramidField(pyramid.remove_summary(i))})

                eval_instances.append(
                    EvalInstance(annotation.instance_id,
                                 annotation.summarizer_id,
                                 annotation.summarizer_type, summary, fields))

        return eval_instances
Ejemplo n.º 3
0
    def read(self, documents_jsonl: str,
             summaries_jsonl) -> List[EvalInstance]:
        logger.info(f'Loading documents from {documents_jsonl}')
        documents_dict = {}
        with JsonlReader(documents_jsonl) as f:
            for data in f:
                instance_id = data['instance_id']
                if 'document' in data:
                    documents = [data['document']['text']]
                else:
                    documents = [
                        document['text'] for document in data['documents']
                    ]
                documents = flatten_documents(documents)
                documents_dict[instance_id] = DocumentsField(documents)
        logger.info(f'Loaded {len(documents_dict)} document sets')

        logger.info(f'Loading summaries from {summaries_jsonl}')
        instances = []
        with JsonlReader(summaries_jsonl) as f:
            for data in f:
                fields = {}
                fields['summary'] = SummaryField(data['summary']['text'])

                instance_id = data['instance_id']
                fields['documents'] = documents_dict[instance_id]
                fields = Fields(fields)

                instance = EvalInstance(data['instance_id'],
                                        data['summarizer_id'],
                                        data['summarizer_type'], fields)
                instances.append(instance)
        logger.info(f'Loaded {len(instances)} instances')
        return instances
Ejemplo n.º 4
0
    def get_jackknifing_fields_list(self, fields: Fields) -> List[Fields]:
        pyramid = fields['pyramid'].pyramid
        if len(pyramid.summaries) == 1:
            # No jackknifing can be done
            return None

        jk_fields_list = []
        for i in range(len(pyramid.summaries)):
            jk_fields = Fields(fields)
            jk_fields['pyramid'] = PyramidField(pyramid.remove_summary(i))
            jk_fields_list.append(jk_fields)
        return jk_fields_list
Ejemplo n.º 5
0
    def read(self, input_jsonl: str) -> List[EvalInstance]:
        eval_instances = []
        with JsonlReader(input_jsonl) as f:
            for instance in f:
                instance_id = instance['instance_id']
                summarizer_id = instance['summarizer_id']
                summarizer_type = instance['summarizer_type']
                summary = SummaryField(instance['summary']['text'])

                answered_questions_list = []
                for reference in instance['references']:
                    this_question_list = []
                    for question_dict in reference['questions']:
                        prompt_id = question_dict['prompt_id']
                        question_id = question_dict['question_id']
                        group_id = question_dict[
                            'group_id'] if 'group_id' in question_dict else None
                        question = question_dict['question']
                        answer = question_dict['answer']
                        for prediction_dict in question_dict['predictions']:
                            prediction_id = prediction_dict['prediction_id']
                            prediction = prediction_dict['answer']
                            probability = prediction_dict['probability']
                            null_probability = prediction_dict[
                                'null_probability']

                            if 'is_correct' in prediction_dict:
                                is_correct = prediction_dict['is_correct']
                            else:
                                # Dummy
                                is_correct = False

                            this_question_list.append(
                                AnsweredQuestion(prompt_id, question_id,
                                                 prediction_id, question,
                                                 answer, prediction,
                                                 probability, null_probability,
                                                 group_id, is_correct))

                    if len(this_question_list) > 0:
                        answered_questions_list.append(this_question_list)

                if len(answered_questions_list) > 0:
                    fields = Fields({
                        'summary':
                        summary,
                        'answered_questions':
                        AnsweredQuestionsField(answered_questions_list)
                    })
                    eval_instances.append(
                        EvalInstance(instance_id, summarizer_id,
                                     summarizer_type, fields))
        return eval_instances
Ejemplo n.º 6
0
    def get_jackknifing_fields_list(self, fields: Fields) -> List[Fields]:
        pyramid: Pyramid = fields['pyramid'].pyramid
        if len(pyramid.summarizer_ids) == 1:
            # No jackknifing can be done, return `None`
            return None

        jk_fields_list = []
        for i in range(len(pyramid.summarizer_ids)):
            # Copy the original fields and replace the pyramid
            jk_fields = Fields(fields)
            jk_fields['pyramid'] = PyramidField(pyramid.remove_summary(i))
            jk_fields_list.append(jk_fields)
        return jk_fields_list
Ejemplo n.º 7
0
    def read(self) -> List[EvalInstance]:
        instances = []
        with JsonlReader(self.input_jsonl) as f:
            for data in f:
                summary = SummaryField(data['summary']['text'])
                fields = Fields({})

                instance = EvalInstance(data['instance_id'],
                                        data['summarizer_id'],
                                        data['summarizer_type'], summary,
                                        fields)
                instances.append(instance)
            return instances
Ejemplo n.º 8
0
    def get_jackknifing_fields_list(self, fields: Fields) -> List[Fields]:
        field = fields['answered_questions']
        if len(field.answered_questions_list) == 1:
            # No jackknifing can be done, return `None` to indicate it cannot be done
            return None

        jk_fields_list = []
        for i in range(len(field.answered_questions_list)):
            jk_fields = Fields(fields)
            jk_fields['answered_questions'] = AnsweredQuestionsField(
                field.answered_questions_list[:i] +
                field.answered_questions_list[i + 1:])
            jk_fields_list.append(jk_fields)
        return jk_fields_list
Ejemplo n.º 9
0
    def get_jackknifing_fields_list(self, fields: Fields) -> List[Fields]:
        references_field = fields['references']
        if len(references_field.references) == 1:
            # No jackknifing can be done, return `None` to indicate it cannot be done
            return None

        jk_fields_list = []
        for i in range(len(references_field.references)):
            # Copy the original fields and replace the references
            jk_fields = Fields(fields)
            jk_fields['references'] = ReferencesField(
                references_field.references[:i] +
                references_field.references[i + 1:])
            jk_fields_list.append(jk_fields)
        return jk_fields_list
    def read(self) -> List[EvalInstance]:
        instances = []
        with JsonlReader(self.input_jsonl) as f:
            for data in f:
                summary = SummaryField(data['summary']['text'])

                if 'document' in data:
                    documents = DocumentsField([data['document']['text']])
                else:
                    documents = DocumentsField(
                        [document['text'] for document in data['documents']])
                fields = Fields({'documents': documents})

                instance = EvalInstance(data['instance_id'],
                                        data['summarizer_id'],
                                        data['summarizer_type'], summary,
                                        fields)
                instances.append(instance)
            return instances
Ejemplo n.º 11
0
    def read(self, input_jsonl: str) -> List[EvalInstance]:
        logger.info(f'Loading evaluation instances from {input_jsonl}')
        instances = []
        with JsonlReader(input_jsonl) as f:
            for data in f:
                fields = {}
                fields['summary'] = SummaryField(data['summary']['text'])
                fields = Fields(fields)

                instance = EvalInstance(
                    data['instance_id'],
                    data['summarizer_id'],
                    data['summarizer_type'],
                    fields
                )
                instances.append(instance)

            logger.info(f'Loaded {len(instances)} instances')
            return instances
Ejemplo n.º 12
0
    def read(self) -> List[EvalInstance]:
        instances = []
        with JsonlReader(self.input_jsonl) as f:
            for data in f:
                summary = SummaryField(data['summary']['text'])

                if 'reference' in data:
                    references = ReferencesField([data['reference']['text']])
                else:
                    references = ReferencesField([
                        reference['text'] for reference in data['references']
                    ])
                fields = Fields({'references': references})

                instance = EvalInstance(data['instance_id'],
                                        data['summarizer_id'],
                                        data['summarizer_type'], summary,
                                        fields)
                instances.append(instance)
            return instances