def load_peer_pyramids( eval_tar: str, pyramids: Dict[str, Dict[str, Pyramid]] ) -> Dict[str, Dict[str, Dict[str, PyramidAnnotation]]]: annotations = defaultdict(lambda: defaultdict(dict)) with tarfile.open(eval_tar, 'r') as tar: for member in tar.getmembers(): if member.isfile() and member.name.startswith( 'UpdateSumm09_eval/manual/peers/'): path = member.name.split('/') filename = path[-1] parts = filename.split('.') instance_id = parts[0].split('-')[0].lower() group = parts[0].split('-')[1] summarizer_id = parts[-1] # This directory only has peers assert not summarizer_id.isalpha() summarizer_type = 'peer' pyramid = pyramids[instance_id][group] xml = tar.extractfile(member).read().decode() annotation = PyramidAnnotation.from_xml( f'{instance_id}-{group}', summarizer_id, summarizer_type, xml, pyramid) if annotation: annotations[instance_id][summarizer_id][group] = annotation else: print( f'Annotation for {instance_id}-{group}, {summarizer_id} is `None`. Skipping' ) return annotations
def load_peer_pyramids( eval_tar: str, pyramids: Dict[str, Pyramid]) -> Dict[str, Dict[str, PyramidAnnotation]]: annotations = defaultdict(dict) with tarfile.open(eval_tar, 'r') as tar: for member in tar.getmembers(): if member.isfile() and member.name.startswith('pans/'): path = member.name.split('/') filename = path[-1] parts = filename.split('.') instance_id = parts[1].lower() summarizer_id = parts[5] if summarizer_id.isalpha(): summarizer_type = 'reference' else: summarizer_type = 'peer' pyramid = pyramids[instance_id] xml = tar.extractfile(member).read().decode() annotation = PyramidAnnotation.from_xml( f'{instance_id}', summarizer_id, summarizer_type, xml, pyramid) if annotation: annotations[instance_id][summarizer_id] = annotation else: print( f'Annotation for {instance_id}, {summarizer_id} is `None`. Skipping' ) return annotations
def load_main_annotations( main_pyramid_tar: str, pyramids: Dict[str, Pyramid]) -> Dict[str, Dict[str, PyramidAnnotation]]: annotations = defaultdict(dict) with tarfile.open(main_pyramid_tar, 'r') as tar: for member in tar.getmembers(): if member.isfile() and member.name.startswith( 'mainPyramidEval/allpans'): path = member.name.split('/') filename = path[-1] parts = filename.split('.') instance_id = parts[0].lower() summarizer_id = parts[4] # This directory only has peers assert not summarizer_id.isalpha() summarizer_type = 'peer' pyramid = pyramids[instance_id] xml = tar.extractfile(member).read().decode() annotation = PyramidAnnotation.from_xml( f'{instance_id}', summarizer_id, summarizer_type, xml, pyramid) if annotation: annotations[instance_id][summarizer_id] = annotation else: print( f'Annotation for {instance_id}, {summarizer_id} is `None`. Skipping' ) return annotations
def load_peer_pyramids( eval_tar: str, pyramids: Dict[str, Dict[str, Pyramid]] ) -> Dict[str, Dict[str, Dict[str, PyramidAnnotation]]]: annotations = defaultdict(dict) multiples = defaultdict(list) with tarfile.open(eval_tar, 'r') as tar: for member in tar.getmembers(): if member.isfile() and member.name.startswith('allpans/'): path = member.name.split('/') filename = path[-1] parts = filename.split('.') number = parts[0] instance_id = parts[1].lower() summarizer_id = parts[5] # This directory only has peers assert not summarizer_id.isalpha() summarizer_type = 'peer' pyramid = pyramids[instance_id] xml = tar.extractfile(member).read().decode() annotation = PyramidAnnotation.from_xml( f'{instance_id}', summarizer_id, summarizer_type, xml, pyramid) if annotation: if instance_id == 'd0631': # This instance was annotated twice. We just take the first set # for the pyramids files, but save both for a separate file if number == '114': annotations[instance_id][ summarizer_id] = annotation multiples[summarizer_id].append(annotation) else: annotations[instance_id][summarizer_id] = annotation else: print( f'Annotation for {instance_id}, {summarizer_id} is `None`. Skipping' ) return annotations, multiples
def _get_scu_intersection(self, annotation: PyramidAnnotation, pyramid: Pyramid, index: int) -> Set[int]: annotation_scus = annotation.get_scu_id_set() reference_scus = pyramid.get_scu_id_set(index) return annotation_scus & reference_scus