コード例 #1
0
def align_clusters(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    document_mappings = DocumentMappings(
        logger, args.parent_children, Encodings(logger, args.encodings),
        CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    annotated_regions = AnnotatedRegions(logger, document_mappings,
                                         document_boundaries, args.regions)

    os.mkdir(args.similarities)
    os.mkdir(args.alignment)
    for entry in sorted(os.scandir(args.gold), key=str):
        if entry.is_dir() and entry.name.endswith('.ttl'):
            kb = entry.name
            message = 'aligning clusters in {}'.format(entry.name)
            logger.record_event('DEFAULT_INFO', message)
            print('At {}: {}'.format(
                time.strftime("%m/%d/%Y %H:%M:%S", time.localtime()), message))
            document_id = kb.replace('.ttl', '')

            gold_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format(
                args.gold, kb)
            gold_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format(
                args.gold, kb)
            system_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format(
                args.system, kb)
            system_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format(
                args.system, kb)

            gold_mentions = gold_mentions if os.path.exists(
                gold_mentions) else None
            gold_edges = gold_edges if os.path.exists(gold_edges) else None
            system_mentions = system_mentions if os.path.exists(
                system_mentions) else None
            system_edges = system_edges if os.path.exists(
                system_edges) else None

            similarities = '{}/{}.tab'.format(args.similarities, document_id)
            alignment = '{}/{}.tab'.format(args.alignment, document_id)
            check_for_paths_non_existance([similarities, alignment])
            clusters = Clusters(logger, document_mappings, document_boundaries,
                                annotated_regions, gold_mentions, gold_edges,
                                system_mentions, system_edges)
            clusters.print_similarities(similarities)
            clusters.print_alignment(alignment)
    exit(ALLOK_EXIT_CODE)
コード例 #2
0
def filter_responses(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    ontology_type_mappings = OntologyTypeMappings(logger,
                                                  args.ontology_type_mappings)
    slot_mappings = SlotMappings(logger, args.slot_mappings)
    document_mappings = DocumentMappings(
        logger, args.parent_children, Encodings(logger, args.encodings),
        CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    responses = ResponseSet(logger, ontology_type_mappings, slot_mappings,
                            document_mappings, document_boundaries, args.input,
                            args.runid)
    annotated_regions = AnnotatedRegions(logger, document_mappings,
                                         document_boundaries, args.regions)
    run_filter_on_all_responses(responses, annotated_regions,
                                document_mappings, document_boundaries)

    os.mkdir(args.output)
    for input_filename in responses:
        output_filename = input_filename.replace(responses.get('path'),
                                                 args.output)
        dirname = os.path.dirname(output_filename)
        if not os.path.exists(dirname):
            os.mkdir(dirname)
        output_fh = open(output_filename, 'w')
        header_printed = False
        for linenum in sorted(responses.get(input_filename), key=int):
            entry = responses.get(input_filename).get(str(linenum))
            if not header_printed:
                output_fh.write('{}\n'.format(entry.get('header').get('line')))
                header_printed = True
            if not entry.get('valid'):
                logger.record_event('EXPECTING_VALID_ENTRY',
                                    entry.get('where'))
                continue
            if entry.get('passes_filter'):
                output_fh.write(entry.__str__())
        output_fh.close()
    exit(ALLOK_EXIT_CODE)
コード例 #3
0
ファイル: score_submission.py プロジェクト: dangiankit/aida
def score_submission(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    ontology_type_mappings = OntologyTypeMappings(logger,
                                                  args.ontology_type_mappings)
    slot_mappings = SlotMappings(logger, args.slot_mappings)
    document_mappings = DocumentMappings(
        logger, args.parent_children, Encodings(logger, args.encodings),
        CoreDocuments(logger, args.core_documents))
    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    annotated_regions = AnnotatedRegions(logger, ontology_type_mappings,
                                         document_mappings,
                                         document_boundaries, args.regions)

    gold_responses = ResponseSet(logger, ontology_type_mappings, slot_mappings,
                                 document_mappings, document_boundaries,
                                 args.gold, 'gold')
    system_responses = ResponseSet(logger, ontology_type_mappings,
                                   slot_mappings, document_mappings,
                                   document_boundaries, args.system,
                                   args.runid)
    cluster_alignment = ClusterAlignment(logger, args.alignment)
    cluster_self_similarities = ClusterSelfSimilarities(
        logger, args.similarities)
    scores = ScoresManager(logger, annotated_regions, gold_responses,
                           system_responses, cluster_alignment,
                           cluster_self_similarities, args.separator)
    scores.print_scores(args.scores)
    exit(ALLOK_EXIT_CODE)