Example #1
0
def score_submission(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    ontology_type_mappings = OntologyTypeMappings(logger,
                                                  args.ontology_type_mappings)
    slot_mappings = SlotMappings(logger, args.slot_mappings)
    document_mappings = DocumentMappings(
        logger, args.parent_children, Encodings(logger, args.encodings),
        CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    annotated_regions = AnnotatedRegions(logger, ontology_type_mappings,
                                         document_mappings,
                                         document_boundaries, args.regions)

    gold_responses = ResponseSet(logger, ontology_type_mappings, slot_mappings,
                                 document_mappings, document_boundaries,
                                 args.gold, 'gold')
    system_responses = ResponseSet(logger, ontology_type_mappings,
                                   slot_mappings, document_mappings,
                                   document_boundaries, args.system,
                                   args.runid)
    cluster_alignment = ClusterAlignment(logger, args.alignment)
    cluster_self_similarities = ClusterSelfSimilarities(
        logger, args.similarities)
    scores = ScoresManager(logger, annotated_regions, gold_responses,
                           system_responses, cluster_alignment,
                           cluster_self_similarities, args.separator)
    scores.print_scores(args.scores)
    exit(ALLOK_EXIT_CODE)
Example #2
0
def validate_responses(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings)
    slot_mappings = SlotMappings(logger, args.slot_mappings)
    document_mappings = DocumentMappings(logger,
                                         args.parent_children,
                                         Encodings(logger, args.encodings),
                                         CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
        }

    responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.input, args.runid)
    responses.write_valid_responses(args.output)
    exit(ALLOK_EXIT_CODE)
Example #3
0
def validate_responses(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    logger.record_event('DEFAULT_INFO', 'validation started')
    document_mappings = DocumentMappings(
        logger, args.parent_children, Encodings(logger, args.encodings),
        CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    queries = TA3QuerySet(logger, args.queries) if args.queries else None
    responses = ResponseSet(logger,
                            document_mappings,
                            document_boundaries,
                            args.input,
                            args.runid,
                            'task3',
                            queries=queries)
    arf = AssessorReadableFormat(logger, responses, args.max_qnode_types,
                                 args.lenient)
    arf.write_output(args.output)
    num_warnings, num_errors = logger.get_stats()
    closing_message = 'ARF generation finished (warnings:{}, errors:{})'.format(
        num_warnings, num_errors)
    logger.record_event('DEFAULT_INFO', closing_message)
    print(closing_message)
    if num_errors > 0:
        exit(ERROR_EXIT_CODE)
    exit(ALLOK_EXIT_CODE)
Example #4
0
def main(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    type_mappings = Container(logger)
    for entry in FileHandler(logger, args.ontology_type_mappings):
        type_mappings.add(key=entry.get('full_type_ov'),
                          value=entry.get('full_type'))

    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    output = []
    for entry in FileHandler(logger, args.input):
        document_id = entry.get('root_doc_id')
        document_element_id = entry.get('doc_element_id')
        modality = entry.get('media_type')
        type = entry.get('type')
        subtype = entry.get('subtype')
        subsubtype = entry.get('subsubtype')
        full_type = '{type}.{subtype}.{subsubtype}'.format(
            type=type, subtype=subtype, subsubtype=subsubtype)
        full_type_cleaned = full_type.replace('.unspecified', '')
        propercased_full_type = type_mappings.get(full_type_cleaned, None)
        span_string = entry.get('span')
        keyframe_id = None
        keyframe_num = 0
        if span_string == 'ENTIRE_DOCUMENT_ELEMENT':
            document_boundary = document_boundaries.get(modality).get(
                document_element_id)
            span_string = document_boundary.__str__()
        elif '-' in span_string:
            start, end = span_string.split('-')
            span_string = '({start},0)-({end},0)'.format(start=start, end=end)
        elif '_' in span_string:
            keyframe_id = span_string
            keyframe_num = span_string.split('_')[1]
            document_boundary = document_boundaries.get('keyframe').get(
                keyframe_id)
            span_string = document_boundary.__str__()
        else:
            span_string = None
        output_object = {
            'document_id': document_id,
            'document_element_id': document_element_id,
            'keyframe_id': keyframe_id,
            'keyframe_num': int(keyframe_num),
            'modality': modality,
            'region': span_string,
            'type': propercased_full_type,
        }
        output.append(output_object)

    printed = {}
    fh = open(args.output, 'w')
    header = [
        'document_id', 'document_element_or_keyframe_id', 'modality', 'region',
        'type'
    ]
    fh.write('{}\n'.format('\t'.join(header)))
    for output_object in multisort(
            output, (('document_id', False), ('modality', False),
                     ('document_element_id', False), ('keyframe_num', False),
                     ('region', False), ('type', False))):
        line = get_line(output_object, header)
        if line not in printed:
            fh.write('{}\n'.format(line))
            printed[line] = 1
    fh.close()
    exit(ALLOK_EXIT_CODE)
Example #5
0
def align_clusters(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    ontology_type_mappings = OntologyTypeMappings(logger,
                                                  args.ontology_type_mappings)
    document_mappings = DocumentMappings(
        logger, args.parent_children, Encodings(logger, args.encodings),
        CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    annotated_regions = AnnotatedRegions(logger, ontology_type_mappings,
                                         document_mappings,
                                         document_boundaries, args.regions)

    thresholds = {
        'ENG': args.eng_iou_threshold,
        'SPA': args.spa_iou_threshold,
        'RUS': args.rus_iou_threshold,
        'image': args.image_iou_threshold,
        'video': args.video_iou_threshold
    }

    os.mkdir(args.similarities)
    os.mkdir(args.alignment)
    for entry in sorted(os.scandir(args.gold), key=str):
        if entry.is_dir() and entry.name.endswith('.ttl'):
            kb = entry.name
            document_id = kb.replace('.ttl', '')
            if not document_mappings.get('documents').get(document_id).get(
                    'is_core'):
                continue
            message = 'aligning clusters in {}'.format(entry.name)
            logger.record_event('DEFAULT_INFO', message)
            print('At {}: {}'.format(
                time.strftime("%m/%d/%Y %H:%M:%S", time.localtime()), message))

            gold_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format(
                args.gold, kb)
            gold_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format(
                args.gold, kb)
            system_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format(
                args.system, kb)
            system_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format(
                args.system, kb)

            gold_mentions = gold_mentions if os.path.exists(
                gold_mentions) else None
            gold_edges = gold_edges if os.path.exists(gold_edges) else None
            system_mentions = system_mentions if os.path.exists(
                system_mentions) else None
            system_edges = system_edges if os.path.exists(
                system_edges) else None

            similarities = '{}/{}.tab'.format(args.similarities, document_id)
            alignment = '{}/{}.tab'.format(args.alignment, document_id)
            check_for_paths_non_existance([similarities, alignment])
            clusters = Clusters(logger, document_mappings, document_boundaries,
                                annotated_regions, gold_mentions, gold_edges,
                                system_mentions, system_edges, thresholds)
            clusters.print_similarities(similarities)
            clusters.print_alignment(alignment)
    exit(ALLOK_EXIT_CODE)