Ejemplo n.º 1
0
def validate_responses(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    logger.record_event('DEFAULT_INFO', 'validation started')
    document_mappings = DocumentMappings(
        logger, args.parent_children, Encodings(logger, args.encodings),
        CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    queries = TA3QuerySet(logger, args.queries) if args.queries else None
    responses = ResponseSet(logger,
                            document_mappings,
                            document_boundaries,
                            args.input,
                            args.runid,
                            args.task,
                            queries=queries)
    responses.write_valid_responses(args.output)
    num_warnings, num_errors = logger.get_stats()
    closing_message = 'validation finished (warnings:{}, errors:{})'.format(
        num_warnings, num_errors)
    logger.record_event('DEFAULT_INFO', closing_message)
    print(closing_message)
    if num_errors > 0:
        exit(ERROR_EXIT_CODE)
    exit(ALLOK_EXIT_CODE)
Ejemplo n.º 2
0
def main(args):
    """
    The main program for generating AIF
    """
    check_paths(args)
    logger = Logger(args.log, args.log_specifications_filename, sys.argv)
    core_documents = CoreDocuments(logger, args.core_documents_filename)
    encodings = Encodings(logger, args.encodings_filename)
    document_mappings = DocumentMappings(logger, args.parent_children_filename,
                                         encodings, core_documents)
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries_filename)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries_filename)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries_filename)
    keyframe_boundaries = KeyFrameBoundaries(logger,
                                             args.keyframe_boundaries_filename)
    type_mappings = Container(logger)
    for entry in FileHandler(logger, args.type_mappings_filename):
        type_mappings.add(key=entry.get('full_type_ov'),
                          value=entry.get('full_type'))
    slot_mappings = SlotMappings(logger, args.slot_mappings_filename)
    annotations = Annotations(logger,
                              slot_mappings,
                              document_mappings,
                              text_boundaries,
                              image_boundaries,
                              video_boundaries,
                              keyframe_boundaries,
                              type_mappings,
                              args.annotations,
                              load_video_time_offsets_flag=args.notime)
    generator = AIFGenerator(logger, annotations, args.nochannel,
                             args.reference_kb_id)
    generator.write_output(args.output)
    exit(ALLOK_EXIT_CODE)
Ejemplo n.º 3
0
 def __call__(self):
     logger = self.get('logger')
     document_mappings = DocumentMappings(
         logger, self.get('parent_children'),
         Encodings(logger, self.get('encodings')),
         CoreDocuments(logger, self.get('core_documents')))
     text_boundaries = TextBoundaries(logger,
                                      self.get('sentence_boundaries'))
     image_boundaries = ImageBoundaries(logger,
                                        self.get('image_boundaries'))
     video_boundaries = VideoBoundaries(logger,
                                        self.get('video_boundaries'))
     keyframe_boundaries = KeyFrameBoundaries(
         logger, self.get('keyframe_boundaries'))
     document_boundaries = {
         'text': text_boundaries,
         'image': image_boundaries,
         'keyframe': keyframe_boundaries,
         'video': video_boundaries
     }
     pool = Task2Pool(logger,
                      document_mappings=document_mappings,
                      document_boundaries=document_boundaries,
                      runs_to_pool_file=self.get('runs_to_pool'),
                      queries_to_pool_file=self.get('queries'),
                      max_kit_size=self.get('kit_size'),
                      batch_id=self.get('batch_id'),
                      input_dir=self.get('input'),
                      previous_pools=self.get('previous_pools'))
     pool.write_output('{}-{}'.format(self.get('output'),
                                      self.get('batch_id')))
     exit(ALLOK_EXIT_CODE)
Ejemplo n.º 4
0
def score_submission(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings)
    slot_mappings = SlotMappings(logger, args.slot_mappings)
    document_mappings = DocumentMappings(logger,
                                         args.parent_children,
                                         Encodings(logger, args.encodings),
                                         CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
        }
    
    gold_responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.gold, 'gold')
    system_responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.system, args.runid)
    cluster_alignment = ClusterAlignment(logger, args.alignment)
    cluster_self_similarities = ClusterSelfSimilarities(logger, args.similarities)
    scores = ScoresManager(logger, gold_responses, system_responses, cluster_alignment, cluster_self_similarities, args.separator)
    scores.print_scores(args.scores)
    exit(ALLOK_EXIT_CODE)
Ejemplo n.º 5
0
def align_clusters(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    document_mappings = DocumentMappings(
        logger, args.parent_children, Encodings(logger, args.encodings),
        CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    annotated_regions = AnnotatedRegions(logger, document_mappings,
                                         document_boundaries, args.regions)

    os.mkdir(args.similarities)
    os.mkdir(args.alignment)
    for entry in sorted(os.scandir(args.gold), key=str):
        if entry.is_dir() and entry.name.endswith('.ttl'):
            kb = entry.name
            message = 'aligning clusters in {}'.format(entry.name)
            logger.record_event('DEFAULT_INFO', message)
            print('At {}: {}'.format(
                time.strftime("%m/%d/%Y %H:%M:%S", time.localtime()), message))
            document_id = kb.replace('.ttl', '')

            gold_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format(
                args.gold, kb)
            gold_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format(
                args.gold, kb)
            system_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format(
                args.system, kb)
            system_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format(
                args.system, kb)

            gold_mentions = gold_mentions if os.path.exists(
                gold_mentions) else None
            gold_edges = gold_edges if os.path.exists(gold_edges) else None
            system_mentions = system_mentions if os.path.exists(
                system_mentions) else None
            system_edges = system_edges if os.path.exists(
                system_edges) else None

            similarities = '{}/{}.tab'.format(args.similarities, document_id)
            alignment = '{}/{}.tab'.format(args.alignment, document_id)
            check_for_paths_non_existance([similarities, alignment])
            clusters = Clusters(logger, document_mappings, document_boundaries,
                                annotated_regions, gold_mentions, gold_edges,
                                system_mentions, system_edges)
            clusters.print_similarities(similarities)
            clusters.print_alignment(alignment)
    exit(ALLOK_EXIT_CODE)
Ejemplo n.º 6
0
def filter_responses(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    ontology_type_mappings = OntologyTypeMappings(logger,
                                                  args.ontology_type_mappings)
    slot_mappings = SlotMappings(logger, args.slot_mappings)
    document_mappings = DocumentMappings(
        logger, args.parent_children, Encodings(logger, args.encodings),
        CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    responses = ResponseSet(logger, ontology_type_mappings, slot_mappings,
                            document_mappings, document_boundaries, args.input,
                            args.runid)
    annotated_regions = AnnotatedRegions(logger, document_mappings,
                                         document_boundaries, args.regions)
    run_filter_on_all_responses(responses, annotated_regions,
                                document_mappings, document_boundaries)

    os.mkdir(args.output)
    for input_filename in responses:
        output_filename = input_filename.replace(responses.get('path'),
                                                 args.output)
        dirname = os.path.dirname(output_filename)
        if not os.path.exists(dirname):
            os.mkdir(dirname)
        output_fh = open(output_filename, 'w')
        header_printed = False
        for linenum in sorted(responses.get(input_filename), key=int):
            entry = responses.get(input_filename).get(str(linenum))
            if not header_printed:
                output_fh.write('{}\n'.format(entry.get('header').get('line')))
                header_printed = True
            if not entry.get('valid'):
                logger.record_event('EXPECTING_VALID_ENTRY',
                                    entry.get('where'))
                continue
            if entry.get('passes_filter'):
                output_fh.write(entry.__str__())
        output_fh.close()
    exit(ALLOK_EXIT_CODE)
Ejemplo n.º 7
0
def validate_responses(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings)
    slot_mappings = SlotMappings(logger, args.slot_mappings)
    document_mappings = DocumentMappings(logger,
                                         args.parent_children,
                                         Encodings(logger, args.encodings),
                                         CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
        }

    responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.input, args.runid)
    responses.write_valid_responses(args.output)
    exit(ALLOK_EXIT_CODE)
Ejemplo n.º 8
0
def main(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    type_mappings = Container(logger)
    for entry in FileHandler(logger, args.ontology_type_mappings):
        type_mappings.add(key=entry.get('full_type_ov'),
                          value=entry.get('full_type'))

    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    output = []
    for entry in FileHandler(logger, args.input):
        document_id = entry.get('root_doc_id')
        document_element_id = entry.get('doc_element_id')
        modality = entry.get('media_type')
        type = entry.get('type')
        subtype = entry.get('subtype')
        subsubtype = entry.get('subsubtype')
        full_type = '{type}.{subtype}.{subsubtype}'.format(
            type=type, subtype=subtype, subsubtype=subsubtype)
        full_type_cleaned = full_type.replace('.unspecified', '')
        propercased_full_type = type_mappings.get(full_type_cleaned, None)
        span_string = entry.get('span')
        keyframe_id = None
        keyframe_num = 0
        if span_string == 'ENTIRE_DOCUMENT_ELEMENT':
            document_boundary = document_boundaries.get(modality).get(
                document_element_id)
            span_string = document_boundary.__str__()
        elif '-' in span_string:
            start, end = span_string.split('-')
            span_string = '({start},0)-({end},0)'.format(start=start, end=end)
        elif '_' in span_string:
            keyframe_id = span_string
            keyframe_num = span_string.split('_')[1]
            document_boundary = document_boundaries.get('keyframe').get(
                keyframe_id)
            span_string = document_boundary.__str__()
        else:
            span_string = None
        output_object = {
            'document_id': document_id,
            'document_element_id': document_element_id,
            'keyframe_id': keyframe_id,
            'keyframe_num': int(keyframe_num),
            'modality': modality,
            'region': span_string,
            'type': propercased_full_type,
        }
        output.append(output_object)

    printed = {}
    fh = open(args.output, 'w')
    header = [
        'document_id', 'document_element_or_keyframe_id', 'modality', 'region',
        'type'
    ]
    fh.write('{}\n'.format('\t'.join(header)))
    for output_object in multisort(
            output, (('document_id', False), ('modality', False),
                     ('document_element_id', False), ('keyframe_num', False),
                     ('region', False), ('type', False))):
        line = get_line(output_object, header)
        if line not in printed:
            fh.write('{}\n'.format(line))
            printed[line] = 1
    fh.close()
    exit(ALLOK_EXIT_CODE)