def validate_responses(args): logger = Logger(args.log, args.log_specifications, sys.argv) logger.record_event('DEFAULT_INFO', 'validation started') document_mappings = DocumentMappings( logger, args.parent_children, Encodings(logger, args.encodings), CoreDocuments(logger, args.core_documents)) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } queries = TA3QuerySet(logger, args.queries) if args.queries else None responses = ResponseSet(logger, document_mappings, document_boundaries, args.input, args.runid, args.task, queries=queries) responses.write_valid_responses(args.output) num_warnings, num_errors = logger.get_stats() closing_message = 'validation finished (warnings:{}, errors:{})'.format( num_warnings, num_errors) logger.record_event('DEFAULT_INFO', closing_message) print(closing_message) if num_errors > 0: exit(ERROR_EXIT_CODE) exit(ALLOK_EXIT_CODE)
def main(args): """ The main program for generating AIF """ check_paths(args) logger = Logger(args.log, args.log_specifications_filename, sys.argv) core_documents = CoreDocuments(logger, args.core_documents_filename) encodings = Encodings(logger, args.encodings_filename) document_mappings = DocumentMappings(logger, args.parent_children_filename, encodings, core_documents) text_boundaries = TextBoundaries(logger, args.sentence_boundaries_filename) image_boundaries = ImageBoundaries(logger, args.image_boundaries_filename) video_boundaries = VideoBoundaries(logger, args.video_boundaries_filename) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries_filename) type_mappings = Container(logger) for entry in FileHandler(logger, args.type_mappings_filename): type_mappings.add(key=entry.get('full_type_ov'), value=entry.get('full_type')) slot_mappings = SlotMappings(logger, args.slot_mappings_filename) annotations = Annotations(logger, slot_mappings, document_mappings, text_boundaries, image_boundaries, video_boundaries, keyframe_boundaries, type_mappings, args.annotations, load_video_time_offsets_flag=args.notime) generator = AIFGenerator(logger, annotations, args.nochannel, args.reference_kb_id) generator.write_output(args.output) exit(ALLOK_EXIT_CODE)
def __call__(self): logger = self.get('logger') document_mappings = DocumentMappings( logger, self.get('parent_children'), Encodings(logger, self.get('encodings')), CoreDocuments(logger, self.get('core_documents'))) text_boundaries = TextBoundaries(logger, self.get('sentence_boundaries')) image_boundaries = ImageBoundaries(logger, self.get('image_boundaries')) video_boundaries = VideoBoundaries(logger, self.get('video_boundaries')) keyframe_boundaries = KeyFrameBoundaries( logger, self.get('keyframe_boundaries')) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } pool = Task2Pool(logger, document_mappings=document_mappings, document_boundaries=document_boundaries, runs_to_pool_file=self.get('runs_to_pool'), queries_to_pool_file=self.get('queries'), max_kit_size=self.get('kit_size'), batch_id=self.get('batch_id'), input_dir=self.get('input'), previous_pools=self.get('previous_pools')) pool.write_output('{}-{}'.format(self.get('output'), self.get('batch_id'))) exit(ALLOK_EXIT_CODE)
def score_submission(args): logger = Logger(args.log, args.log_specifications, sys.argv) ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings) slot_mappings = SlotMappings(logger, args.slot_mappings) document_mappings = DocumentMappings(logger, args.parent_children, Encodings(logger, args.encodings), CoreDocuments(logger, args.core_documents)) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } gold_responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.gold, 'gold') system_responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.system, args.runid) cluster_alignment = ClusterAlignment(logger, args.alignment) cluster_self_similarities = ClusterSelfSimilarities(logger, args.similarities) scores = ScoresManager(logger, gold_responses, system_responses, cluster_alignment, cluster_self_similarities, args.separator) scores.print_scores(args.scores) exit(ALLOK_EXIT_CODE)
def align_clusters(args): logger = Logger(args.log, args.log_specifications, sys.argv) document_mappings = DocumentMappings( logger, args.parent_children, Encodings(logger, args.encodings), CoreDocuments(logger, args.core_documents)) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } annotated_regions = AnnotatedRegions(logger, document_mappings, document_boundaries, args.regions) os.mkdir(args.similarities) os.mkdir(args.alignment) for entry in sorted(os.scandir(args.gold), key=str): if entry.is_dir() and entry.name.endswith('.ttl'): kb = entry.name message = 'aligning clusters in {}'.format(entry.name) logger.record_event('DEFAULT_INFO', message) print('At {}: {}'.format( time.strftime("%m/%d/%Y %H:%M:%S", time.localtime()), message)) document_id = kb.replace('.ttl', '') gold_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format( args.gold, kb) gold_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format( args.gold, kb) system_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format( args.system, kb) system_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format( args.system, kb) gold_mentions = gold_mentions if os.path.exists( gold_mentions) else None gold_edges = gold_edges if os.path.exists(gold_edges) else None system_mentions = system_mentions if os.path.exists( system_mentions) else None system_edges = system_edges if os.path.exists( system_edges) else None similarities = '{}/{}.tab'.format(args.similarities, document_id) alignment = '{}/{}.tab'.format(args.alignment, document_id) check_for_paths_non_existance([similarities, alignment]) clusters = Clusters(logger, document_mappings, document_boundaries, annotated_regions, gold_mentions, gold_edges, system_mentions, system_edges) clusters.print_similarities(similarities) clusters.print_alignment(alignment) exit(ALLOK_EXIT_CODE)
def filter_responses(args): logger = Logger(args.log, args.log_specifications, sys.argv) ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings) slot_mappings = SlotMappings(logger, args.slot_mappings) document_mappings = DocumentMappings( logger, args.parent_children, Encodings(logger, args.encodings), CoreDocuments(logger, args.core_documents)) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.input, args.runid) annotated_regions = AnnotatedRegions(logger, document_mappings, document_boundaries, args.regions) run_filter_on_all_responses(responses, annotated_regions, document_mappings, document_boundaries) os.mkdir(args.output) for input_filename in responses: output_filename = input_filename.replace(responses.get('path'), args.output) dirname = os.path.dirname(output_filename) if not os.path.exists(dirname): os.mkdir(dirname) output_fh = open(output_filename, 'w') header_printed = False for linenum in sorted(responses.get(input_filename), key=int): entry = responses.get(input_filename).get(str(linenum)) if not header_printed: output_fh.write('{}\n'.format(entry.get('header').get('line'))) header_printed = True if not entry.get('valid'): logger.record_event('EXPECTING_VALID_ENTRY', entry.get('where')) continue if entry.get('passes_filter'): output_fh.write(entry.__str__()) output_fh.close() exit(ALLOK_EXIT_CODE)
def validate_responses(args): logger = Logger(args.log, args.log_specifications, sys.argv) ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings) slot_mappings = SlotMappings(logger, args.slot_mappings) document_mappings = DocumentMappings(logger, args.parent_children, Encodings(logger, args.encodings), CoreDocuments(logger, args.core_documents)) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.input, args.runid) responses.write_valid_responses(args.output) exit(ALLOK_EXIT_CODE)
def main(args): logger = Logger(args.log, args.log_specifications, sys.argv) type_mappings = Container(logger) for entry in FileHandler(logger, args.ontology_type_mappings): type_mappings.add(key=entry.get('full_type_ov'), value=entry.get('full_type')) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } output = [] for entry in FileHandler(logger, args.input): document_id = entry.get('root_doc_id') document_element_id = entry.get('doc_element_id') modality = entry.get('media_type') type = entry.get('type') subtype = entry.get('subtype') subsubtype = entry.get('subsubtype') full_type = '{type}.{subtype}.{subsubtype}'.format( type=type, subtype=subtype, subsubtype=subsubtype) full_type_cleaned = full_type.replace('.unspecified', '') propercased_full_type = type_mappings.get(full_type_cleaned, None) span_string = entry.get('span') keyframe_id = None keyframe_num = 0 if span_string == 'ENTIRE_DOCUMENT_ELEMENT': document_boundary = document_boundaries.get(modality).get( document_element_id) span_string = document_boundary.__str__() elif '-' in span_string: start, end = span_string.split('-') span_string = '({start},0)-({end},0)'.format(start=start, end=end) elif '_' in span_string: keyframe_id = span_string keyframe_num = span_string.split('_')[1] document_boundary = document_boundaries.get('keyframe').get( keyframe_id) span_string = document_boundary.__str__() else: span_string = None output_object = { 'document_id': document_id, 'document_element_id': document_element_id, 'keyframe_id': keyframe_id, 'keyframe_num': int(keyframe_num), 'modality': modality, 'region': span_string, 'type': propercased_full_type, } output.append(output_object) printed = {} fh = open(args.output, 'w') header = [ 'document_id', 'document_element_or_keyframe_id', 'modality', 'region', 'type' ] fh.write('{}\n'.format('\t'.join(header))) for output_object in multisort( output, (('document_id', False), ('modality', False), ('document_element_id', False), ('keyframe_num', False), ('region', False), ('type', False))): line = get_line(output_object, header) if line not in printed: fh.write('{}\n'.format(line)) printed[line] = 1 fh.close() exit(ALLOK_EXIT_CODE)