コード例 #1
0
def test_concepts_performance(events_service, concepts_service, test_results):
    input_dir = Path(os.environ['BIOMEDICUS_TEST_DATA']) / 'concepts'
    recall = Accuracy(name='recall', mode='any', fields=['cui'])
    precision = Accuracy(name='precision', mode='any', fields=['cui'])
    with EventsClient(address=events_service) as client, \
            Pipeline(
                RemoteProcessor(processor_id='biomedicus-concepts', address=concepts_service),
                LocalProcessor(Metrics(recall, tested='umls_concepts', target='gold_concepts'),
                               component_id='metrics'),
                LocalProcessor(Metrics(precision, tested='gold_concepts', target='umls_concepts'),
                               component_id='metrics_reverse'),
                events_client=client
            ) as pipeline:
        for test_file in input_dir.glob('**/*.pickle'):
            with PickleSerializer.file_to_event(test_file, client=client) as event:
                document = event.documents['plaintext']
                pipeline.run(document)

    print('Precision:', precision.value)
    print('Recall:', recall.value)
    timing_info = pipeline.processor_timer_stats('biomedicus-concepts').timing_info
    test_results['Concepts'] = {
        'Precision': precision.value,
        'Recall': recall.value,
        'Remote Call Duration': str(timing_info['remote_call'].mean),
        'Process Method Duration': str(timing_info['process_method'].mean)
    }
    assert recall.value > 0.6
コード例 #2
0
def test_acronyms_performance(events_service, acronyms_service, test_results):
    input_dir = Path(os.environ['BIOMEDICUS_PHI_TEST_DATA']) / 'acronyms'
    top_score_accuracy = Accuracy(name='top_score_accuracy',
                                  fields=['expansion'])
    any_accuracy = Accuracy(name='any_accuracy',
                            mode='any',
                            fields=['expansion'])
    detection_recall = Accuracy(name='detection_recall',
                                mode='location',
                                fields=['expansion'])
    detection_precision = Accuracy(name='detection_precision',
                                   mode='location',
                                   fields=['expansion'])
    with EventsClient(address=events_service) as client, Pipeline(
            RemoteProcessor(processor_id='biomedicus-acronyms',
                            address=acronyms_service),
            LocalProcessor(Metrics(top_score_accuracy,
                                   detection_recall,
                                   tested='acronyms',
                                   target='gold_acronyms'),
                           component_id='top_score_metrics',
                           client=client),
            LocalProcessor(Metrics(detection_precision,
                                   tested='gold_acronyms',
                                   target='acronyms'),
                           component_id='top_score_reverse',
                           client=client),
            LocalProcessor(Metrics(any_accuracy,
                                   tested='all_acronym_senses',
                                   target='gold_acronyms'),
                           component_id='all_senses_metrics',
                           client=client)) as pipeline:
        for test_file in input_dir.glob('**/*.json'):
            with JsonSerializer.file_to_event(test_file,
                                              client=client) as event:
                document = event.documents['plaintext']
                pipeline.run(document)

        print('Top Sense Accuracy:', top_score_accuracy.value)
        print('Any Sense Accuracy:', any_accuracy.value)
        print('Detection Recall:', detection_recall.value)
        print('Detection Precision:', detection_precision.value)
        pipeline.print_times()
        timing_info = pipeline.processor_timer_stats(
            'biomedicus-acronyms').timing_info
        test_results['acronyms'] = {
            'Top sense accuracy': top_score_accuracy.value,
            'Any sense accuracy': any_accuracy.value,
            'Detection Recall': detection_recall.value,
            'Detection Precision': detection_precision.value,
            'Remote Call Duration': str(timing_info['remote_call'].mean),
            'Process Method Duration': str(timing_info['process_method'].mean)
        }
        assert top_score_accuracy.value > 0.4
        assert any_accuracy.value > 0.4
        assert detection_recall.value > 0.65
コード例 #3
0
ファイル: test_metrics.py プロジェクト: nlpie/mtap
def test_fields():
    with Event(event_id='1') as event:
        doc = event.create_document('test', 'This is some text.')
        with doc.get_labeler('tested') as tested:
            tested(0, 5, x=1, y=3)
            tested(6, 10, x=3, y=4)
        with doc.get_labeler('target') as target:
            target(0, 5, x=1, y=5)
            target(6, 10, x=2, y=6)

        acc = Accuracy(fields=['x'])
        metrics = Metrics(acc, tested='tested', target='target')
        metrics.process_document(doc, params={})
        assert abs(acc.value - 0.5) < 1e-6
コード例 #4
0
ファイル: test_metrics.py プロジェクト: nlpie/mtap
def test_any():
    with Event(event_id='1') as event:
        doc = event.create_document('test', 'This is some text.')
        with doc.get_labeler('tested') as tested:
            tested(0, 5, x=1)
            tested(0, 5, x=3)
        with doc.get_labeler('target') as target:
            target(0, 5, x=1)
            target(6, 10, x=2)

        acc = Accuracy(mode='any')
        metrics = Metrics(acc, tested='tested', target='target')
        metrics.process_document(doc, params={})
        assert abs(acc.value - 0.5) < 1e-6
コード例 #5
0
def test_tnt_performance(events_service, pos_tags_service, test_results):
    input_dir = Path(os.environ['BIOMEDICUS_TEST_DATA']) / 'pos_tags'
    accuracy = Accuracy()
    with EventsClient(address=events_service) as client, Pipeline(
            RemoteProcessor(processor_id='biomedicus-tnt-tagger',
                            address=pos_tags_service,
                            params={'token_index': 'gold_tags'}),
            LocalProcessor(Metrics(accuracy,
                                   tested='pos_tags',
                                   target='gold_tags'),
                           component_id='metrics'),
            events_client=client) as pipeline:
        for test_file in input_dir.glob('**/*.pickle'):
            event = PickleSerializer.file_to_event(test_file, client=client)
            with event:
                document = event.documents['gold']
                results = pipeline.run(document)
                print(
                    'Accuracy for event - ', event.event_id, ':',
                    results.component_result(
                        'metrics').result_dict['accuracy'])

        print('Accuracy:', accuracy.value)
        pipeline.print_times()
        timing_info = pipeline.processor_timer_stats(
            'biomedicus-tnt-tagger').timing_info
        test_results['TnT Pos Tagger'] = {
            'Accuracy': accuracy.value,
            'Remote Call Duration': str(timing_info['remote_call'].mean),
            'Process Method Duration': str(timing_info['process_method'].mean)
        }
        assert accuracy.value > 0.9
コード例 #6
0
def test_dependencies(events_service, dependencies_service, test_results):
    test_dir = Path(os.environ['BIOMEDICUS_TEST_DATA']) / 'dependencies'
    uas = Accuracy('UAS', equivalence_test=uas_equal)
    las = Accuracy('LAS', equivalence_test=las_equal)
    with EventsClient(address=events_service) as client, \
            Pipeline(
                RemoteProcessor(processor_id='biomedicus-dependencies',
                                address=dependencies_service),
                LocalProcessor(Metrics(uas, las, tested='dependencies', target='gold_dependencies'),
                               component_id='accuracy', client=client)
            ) as pipeline:
        for test_file in test_dir.glob('**/*.pickle'):
            with PickleSerializer.file_to_event(test_file,
                                                client=client) as event:
                document = event.documents['plaintext']
                results = pipeline.run(document)
                accuracy_dict = results.component_result(
                    'accuracy').result_dict
                print('Results for document: UAS: {}. LAS: {}.'.format(
                    accuracy_dict['UAS'], accuracy_dict['LAS']))

    print('UAS:', uas.value)
    print('LAS:', las.value)
    timing_info = pipeline.processor_timer_stats(
        'biomedicus-dependencies').timing_info
    test_results['biomedicus-dependencies'] = {
        'UAS': uas.value,
        'LAS': las.value,
        'Corpus': "MiPACQ converted to UD from PTB test set",
        'Remote Call Duration': str(timing_info['remote_call'].mean),
        'Process Method Duration': str(timing_info['process_method'].mean)
    }
コード例 #7
0
ファイル: test_metrics.py プロジェクト: nlpie/mtap
def test_boundary_fuzz_equals():
    with Event(event_id='1') as event:
        doc = event.create_document('test', 'This is some text.')
        with doc.get_labeler('tested') as tested:
            tested(0, 5)
            tested(6, 10)
            tested(11, 20)
            tested(21, 29)
            tested(31, 39)

        with doc.get_labeler('target') as target:
            target(0, 6)
            target(7, 10)
            target(11, 19)
            target(20, 30)
            target(49, 50)

        acc = Accuracy(boundary_fuzz=1)
        metrics = Metrics(acc, tested='tested', target='target')
        metrics.process_document(doc, params={})
        assert abs(acc.value - 0.8) < 1e-6