def load_peer_pyramids(
    eval_tar: str, pyramids: Dict[str, Dict[str, Pyramid]]
) -> Dict[str, Dict[str, Dict[str, PyramidAnnotation]]]:
    annotations = defaultdict(lambda: defaultdict(dict))
    with tarfile.open(eval_tar, 'r') as tar:
        for member in tar.getmembers():
            if member.isfile() and member.name.startswith(
                    'UpdateSumm09_eval/manual/peers/'):
                path = member.name.split('/')
                filename = path[-1]
                parts = filename.split('.')
                instance_id = parts[0].split('-')[0].lower()
                group = parts[0].split('-')[1]
                summarizer_id = parts[-1]
                # This directory only has peers
                assert not summarizer_id.isalpha()
                summarizer_type = 'peer'

                pyramid = pyramids[instance_id][group]
                xml = tar.extractfile(member).read().decode()
                annotation = PyramidAnnotation.from_xml(
                    f'{instance_id}-{group}', summarizer_id, summarizer_type,
                    xml, pyramid)
                if annotation:
                    annotations[instance_id][summarizer_id][group] = annotation
                else:
                    print(
                        f'Annotation for {instance_id}-{group}, {summarizer_id} is `None`. Skipping'
                    )

    return annotations
Ejemplo n.º 2
0
def load_peer_pyramids(
        eval_tar: str,
        pyramids: Dict[str,
                       Pyramid]) -> Dict[str, Dict[str, PyramidAnnotation]]:
    annotations = defaultdict(dict)
    with tarfile.open(eval_tar, 'r') as tar:
        for member in tar.getmembers():
            if member.isfile() and member.name.startswith('pans/'):
                path = member.name.split('/')
                filename = path[-1]
                parts = filename.split('.')
                instance_id = parts[1].lower()
                summarizer_id = parts[5]

                if summarizer_id.isalpha():
                    summarizer_type = 'reference'
                else:
                    summarizer_type = 'peer'

                pyramid = pyramids[instance_id]
                xml = tar.extractfile(member).read().decode()
                annotation = PyramidAnnotation.from_xml(
                    f'{instance_id}', summarizer_id, summarizer_type, xml,
                    pyramid)
                if annotation:
                    annotations[instance_id][summarizer_id] = annotation
                else:
                    print(
                        f'Annotation for {instance_id}, {summarizer_id} is `None`. Skipping'
                    )

    return annotations
Ejemplo n.º 3
0
def load_main_annotations(
        main_pyramid_tar: str,
        pyramids: Dict[str,
                       Pyramid]) -> Dict[str, Dict[str, PyramidAnnotation]]:
    annotations = defaultdict(dict)
    with tarfile.open(main_pyramid_tar, 'r') as tar:
        for member in tar.getmembers():
            if member.isfile() and member.name.startswith(
                    'mainPyramidEval/allpans'):
                path = member.name.split('/')
                filename = path[-1]
                parts = filename.split('.')
                instance_id = parts[0].lower()
                summarizer_id = parts[4]
                # This directory only has peers
                assert not summarizer_id.isalpha()
                summarizer_type = 'peer'

                pyramid = pyramids[instance_id]
                xml = tar.extractfile(member).read().decode()
                annotation = PyramidAnnotation.from_xml(
                    f'{instance_id}', summarizer_id, summarizer_type, xml,
                    pyramid)
                if annotation:
                    annotations[instance_id][summarizer_id] = annotation
                else:
                    print(
                        f'Annotation for {instance_id}, {summarizer_id} is `None`. Skipping'
                    )

    return annotations
def load_peer_pyramids(
    eval_tar: str, pyramids: Dict[str, Dict[str, Pyramid]]
) -> Dict[str, Dict[str, Dict[str, PyramidAnnotation]]]:
    annotations = defaultdict(dict)
    multiples = defaultdict(list)
    with tarfile.open(eval_tar, 'r') as tar:
        for member in tar.getmembers():
            if member.isfile() and member.name.startswith('allpans/'):
                path = member.name.split('/')
                filename = path[-1]
                parts = filename.split('.')
                number = parts[0]
                instance_id = parts[1].lower()
                summarizer_id = parts[5]
                # This directory only has peers
                assert not summarizer_id.isalpha()
                summarizer_type = 'peer'

                pyramid = pyramids[instance_id]
                xml = tar.extractfile(member).read().decode()
                annotation = PyramidAnnotation.from_xml(
                    f'{instance_id}', summarizer_id, summarizer_type, xml,
                    pyramid)
                if annotation:
                    if instance_id == 'd0631':
                        # This instance was annotated twice. We just take the first set
                        # for the pyramids files, but save both for a separate file
                        if number == '114':
                            annotations[instance_id][
                                summarizer_id] = annotation
                        multiples[summarizer_id].append(annotation)
                    else:
                        annotations[instance_id][summarizer_id] = annotation
                else:
                    print(
                        f'Annotation for {instance_id}, {summarizer_id} is `None`. Skipping'
                    )

    return annotations, multiples
Ejemplo n.º 5
0
 def _get_scu_intersection(self, annotation: PyramidAnnotation, pyramid: Pyramid, index: int) -> Set[int]:
     annotation_scus = annotation.get_scu_id_set()
     reference_scus = pyramid.get_scu_id_set(index)
     return annotation_scus & reference_scus