def test_union_patterns_empty_test():
    filename = 'config/i2b2_2016_track-1.conf'
    score_values = ['(Patient|Provider)']
    namespaces , document_data , ref_patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    score_values = ['I.Do.No.Exist']
    namespaces , document_data , test_patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    ref_patterns , test_patterns = \
      args_and_configs.align_patterns( ref_patterns , test_patterns )
    for ref_pattern in ref_patterns:
        match_flag = False
        for test_pattern in test_patterns:
            if (test_pattern['type'] == ref_pattern['type']):
                match_flag = True
                test_pattern['type'] == ref_pattern['type']
                break
        if (match_flag == False):
            assert ref_pattern['type'] == False
    for test_pattern in test_patterns:
        match_flag = False
        for ref_pattern in ref_patterns:
            if (test_pattern['type'] == ref_pattern['type']):
                match_flag = True
                test_pattern['type'] == ref_pattern['type']
                break
        if (match_flag == False):
            assert test_pattern['type'] == False
Ejemplo n.º 2
0
def test_writing_dictionary_for_datetime_from_0005_gs():
    ingest_file = 'tests/data/i2b2_2016_track-1_reference/0005_gs.xml'
    reference_file = 'tests/data/i2b2_2016_track-1_reference_out/0005_gs.xml'
    config_file = 'config/i2b2_2016_track-1.conf'
    try:
        tmp_descriptor, tmp_file = tempfile.mkstemp()
        os.close(tmp_descriptor)
        namespaces , document_data , patterns = \
          args_and_configs.process_config( config_file = config_file ,
                                           score_key = 'Short Name' ,
                                           score_values = [ '.*' ] )
        text_extraction.extract_annotations(ingest_file,
                                            namespaces=namespaces,
                                            document_data=document_data,
                                            patterns=patterns,
                                            skip_chars=r'[\s]',
                                            out_file=tmp_file)
        with open(reference_file, 'r') as rf:
            reloaded_reference = json.load(rf)
        with open(tmp_file, 'r') as tf:
            reloaded_test = json.load(tf)
        assert reloaded_reference['annotations'] == reloaded_test[
            'annotations']
        assert reloaded_reference['offset_mapping'] == reloaded_test[
            'offset_mapping']
        assert reloaded_reference['raw_content'] == reloaded_test[
            'raw_content']
    finally:
        os.remove(tmp_file)
Ejemplo n.º 3
0
def test_extracting_no_optional_attributes():
    ingest_file = 'tests/data/013_Conditional_Problem.xmi'
    config_file = 'config/webanno_problems_allergies_xmi.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    strict_starts = \
      text_extraction.extract_annotations_xml( ingest_file ,
                                               offset_mapping = {} ,
                                               annotation_path = \
                                                 './custom:Problems' ,
                                               tag_name = 'Problem' ,
                                               namespaces = namespaces ,
                                               begin_attribute = 'begin' ,
                                               end_attribute = 'end' ,
                                               optional_attributes = [] )
    expected_output = \
      { '181' :  [ { 'type': 'Problem' ,
                      'begin_pos': '181' ,
                      'end_pos': '188' ,
                      'raw_text': None } ] ,
        '218' : [ { 'type': 'Problem' ,
                   'begin_pos': '218' ,
                   'end_pos': '224' ,
                   'raw_text': None } ]
      }
    assert strict_starts == expected_output
Ejemplo n.º 4
0
def test_contents_of_write_of_dictionary_for_brat_patterns():
    ingest_file = 'tests/data/brat_reference/problems_and_allergens.ann'
    config_file = 'config/brat_problems_allergies_standoff.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    with tempfile.NamedTemporaryFile() as tmpfile_handle:
        assert os.path.exists(tmpfile_handle.name)
        offset_mapping , strict_starts = \
          text_extraction.extract_annotations( ingest_file ,
                                               namespaces = namespaces ,
                                               document_data = document_data ,
                                               patterns = patterns ,
                                               skip_chars = r'[\s]' ,
                                               out_file = tmpfile_handle.name )
        reloaded_json = json.load(tmpfile_handle)
        assert reloaded_json['annotations'] == strict_starts
        ## T34	Problem 474 493	shortness of breath
        ## A1	Negated T34
        assert strict_starts['474'][0]['begin_pos'] == '474'
        assert strict_starts['474'][0]['end_pos'] == '493'
        assert strict_starts['474'][0]['raw_text'] == 'shortness of breath'
        assert strict_starts['474'][0]['Historical'] == 'false'
        assert strict_starts['474'][0]['Negated'] == 'true'
        assert os.path.exists(tmpfile_handle.name)
    assert os.path.exists(tmpfile_handle.name) == False
Ejemplo n.º 5
0
def test_count_ref_set_default(capsys):
    presaved_file = 'tests/data/i2b2_2016_track-1_reference_out/0005_gs.xml'
    command_line_args = [
        'etude.py', '--reference-input',
        'tests/data/i2b2_2016_track-1_reference', '--reference-config',
        'config/i2b2_2016_track-1.conf', '--print-counts', '--no-metrics'
    ]
    with patch.object(sys, 'argv', command_line_args):
        args = etude.init_args()
        namespaces , document_data , patterns = \
          args_and_configs.process_config( config_file = args.reference_config ,
                                           score_key = args.score_key ,
                                           score_values = args.score_values )
        with open(presaved_file, 'r') as fp:
            reloaded_json = json.load(fp)
        etude.count_ref_set(this_ns=namespaces,
                            this_dd=document_data,
                            this_patterns=patterns,
                            this_folder=args.reference_input,
                            args=args,
                            file_prefix=args.file_prefix,
                            file_suffix=args.file_suffix[0])
        default_out, err = capsys.readouterr()
        expected_values = [['counts', 'n'], ['Total', '482']]
        for expected_values in expected_values:
            print(args.delim.join('{}'.format(m) for m in expected_values))
        expected_out, err = capsys.readouterr()
        default_out = default_out.strip()
        expected_out = expected_out.strip()
        assert default_out == expected_out
Ejemplo n.º 6
0
def test_count_ref_set_csv_out():
    presaved_file = 'tests/data/i2b2_2016_track-1_csv_out.csv'
    try:
        tmp_descriptor, tmp_file = tempfile.mkstemp()
        os.close(tmp_descriptor)
        command_line_args = [
            'etude.py', '--reference-input',
            'tests/data/i2b2_2016_track-1_reference', '--reference-config',
            'config/i2b2_2016_track-1.conf', '--csv-out', tmp_file,
            '--by-file', '--by-type', '--by-file-and-type',
            '--by-type-and-file', '--print-counts', '--no-metrics'
        ]
        with patch.object(sys, 'argv', command_line_args):
            args = etude.init_args()
            namespaces , document_data , patterns = \
              args_and_configs.process_config( config_file = args.reference_config ,
                                               score_key = args.score_key ,
                                               score_values = args.score_values )
            import csv
            with open(presaved_file, 'r') as fp:
                reloaded_csv = fp.read()
            etude.count_ref_set(this_ns=namespaces,
                                this_dd=document_data,
                                this_patterns=patterns,
                                this_folder=args.reference_input,
                                args=args,
                                file_prefix=args.file_prefix,
                                file_suffix=args.file_suffix[0])
            with open(tmp_file, 'r') as fp:
                new_csv = fp.read()
            assert new_csv == reloaded_csv
    finally:
        os.remove(tmp_file)
def test_default_document_format():
    filename = 'config/i2b2_2016_track-1.conf'
    score_values = ['.*']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    assert document_data['format'] == 'Unknown'
def test_set_score_key_Sentences():
    filename = 'config/uima_sentences.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    for pattern in patterns:
        assert pattern['type'] == "Sentence"
def test_plaintext_document_format():
    filename = 'config/plaintext_sentences.conf'
    score_values = ['.*']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    assert document_data['format'] == 'txt'
Ejemplo n.º 10
0
def test_i2b2_2016_track_1_has_empty_namespace():
    config_file = 'config/i2b2_2016_track-1.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    ## Empty dictionary resolves as False
    assert not bool(namespaces)
Ejemplo n.º 11
0
def test_skip_missing_XPath():
    filename = 'config/i2b2_2016_track-1.conf'
    score_values = ['.*']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    for pattern in patterns:
        assert pattern['long_name'] != "Other Person Name"
Ejemplo n.º 12
0
def test_raw_content_extraction_from_plaintext():
    filename = 'config/plaintext_sentences.conf'
    score_values = ['.*']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    assert 'cdata_xpath' not in document_data
    assert 'tag_xpath' not in document_data
    assert 'content_attribute' not in document_data
Ejemplo n.º 13
0
def test_raw_content_extraction_from_attribute():
    filename = 'config/webanno_phi_xmi.conf'
    score_values = ['.*']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    assert 'cdata_xpath' not in document_data
    assert document_data['tag_xpath'] == './cas:Sofa'
    assert document_data['content_attribute'] == 'sofaString'
Ejemplo n.º 14
0
def test_raw_content_extraction_from_cdata():
    filename = 'config/i2b2_2016_track-1.conf'
    score_values = ['.*']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    assert document_data['cdata_xpath'] == './TEXT'
    assert 'tag_xpath' not in document_data
    assert 'content_attribute' not in document_data
Ejemplo n.º 15
0
def test_webanno_custom_namespaces():
    config_file = 'config/webanno_uima_xmi.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    ## Non-empty dictionary resolves as True
    expected_namespaces = {'custom': 'http:///webanno/custom.ecore'}
    with open('/tmp/stdout.log', 'w') as fp:
        fp.write('-----------\n{}\n-------------\n'.format(namespaces))
    assert namespaces == expected_namespaces
Ejemplo n.º 16
0
def test_set_score_key_match_over_multiple_values_Tutorial():
    filename = 'config/CAS_XMI.conf'
    score_values = ['^D.*e$', '^D.*n$', '^T.*e$']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    for pattern in patterns:
        assert pattern['type'] == "DateTime"
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Parent' ,
                                       score_values = score_values )
    for pattern in patterns:
        assert pattern['type'] == "Time"
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Long Name' ,
                                       score_values = score_values )
    for pattern in patterns:
        assert pattern['type'] == "Date and Time Information"
Ejemplo n.º 17
0
def test_set_score_key_match_strict_start_and_end_char_Tutorial():
    filename = 'config/CAS_XMI.conf'
    score_values = ['^[DT].*[en]$']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    for pattern in patterns:
        assert pattern['type'] == "DateTime"
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Parent' ,
                                       score_values = score_values )
    for pattern in patterns:
        assert pattern['type'] == "Time"
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Long Name' ,
                                       score_values = score_values )
    for pattern in patterns:
        assert pattern['type'] == "Date and Time Information"
Ejemplo n.º 18
0
def convert_configs_to_json():
    fileroots = [
        'CAS_XMI', 'i2b2_2016_track-1', 'uima_sentences', 'webanno_uima_xmi'
    ]
    for fileroot in fileroots:
        filename = 'config/' + fileroot + '.conf'
        namespaces , document_data , patterns = \
          args_and_configs.process_config( config_file = filename ,
                                           score_key = 'Short Name' ,
                                           score_values = [ '.*' ] )
        with open('tests/data/' + fileroot + '.json', 'w') as fp:
            json.dump(patterns, fp, indent=4)
Ejemplo n.º 19
0
def test_sentences_has_defined_namespaces():
    config_file = 'config/uima_sentences.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    ## Non-empty dictionary resolves as True
    expected_namespaces = \
      { 'cas' : 'http:///uima/cas.ecore' ,
        'type': 'http:///com/clinacuity/deid/nlp/uima/type.ecore',
        'type4': 'http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore'
      }
    assert namespaces == expected_namespaces
Ejemplo n.º 20
0
def test_optional_attributes():
    filename = 'config/webanno_problems_allergies_xmi.conf'
    score_values = ['.*']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    assert 'conditional' in patterns[0]['optional_attributes']
    assert 'generic' in patterns[0]['optional_attributes']
    assert 'historical' in patterns[0]['optional_attributes']
    assert 'negated' in patterns[0]['optional_attributes']
    assert 'not_patient' in patterns[0]['optional_attributes']
    assert 'uncertain' in patterns[0]['optional_attributes']
Ejemplo n.º 21
0
def test_brat_standoff_format():
    filename = 'config/brat_problems_allergies_standoff.conf'
    score_values = ['.*']
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = filename ,
                                       score_key = 'Short Name' ,
                                       score_values = score_values )
    for pattern in patterns:
        assert pattern['short_name'] == 'Problem' or pattern[
            'short_name'] == 'Allergen'
        assert pattern['type_prefix'] == 'T'
        assert pattern['optional_attributes'] == [
            'Conditional', 'Generic', 'Historical', 'Negated', 'NotPatient',
            'Uncertain'
        ]
Ejemplo n.º 22
0
def test_extracting_sentences_from_CTAKES4_OpenNLP1_8():
    ingest_file = 'tests/data/sentences/992321-OUT.xmi'
    config_file = 'config/uima_sentences.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    strict_starts = \
      text_extraction.extract_annotations_xml( ingest_file ,
                                               offset_mapping = {} ,
                                               namespaces = namespaces ,
                                               annotation_path = \
                                                   './/type:Sentence' ,
                                               tag_name = 'Sentence' ,
                                               begin_attribute = 'begin' ,
                                               end_attribute = 'end' )
    assert len(strict_starts) == 82
Ejemplo n.º 23
0
def test_extracting_sentences_from_0005_gs():
    ingest_file = 'tests/data/i2b2_2016_track-1_reference/0005_gs.xml'
    config_file = 'config/uima_sentences.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    strict_starts = \
      text_extraction.extract_annotations_xml( ingest_file ,
                                                  offset_mapping = {} ,
                                                  namespaces = namespaces ,
                                                  annotation_path = \
                                                      './/type:Sentence' ,
                                                  tag_name = 'Sentence' ,
                                                  begin_attribute = 'begin' ,
                                                  end_attribute = 'end' )
    assert strict_starts == {}
Ejemplo n.º 24
0
def test_of_presaved_dictionary_for_complex_patterns():
    ingest_file = 'tests/data/i2b2_2016_track-1_reference/0005_gs.xml'
    presaved_file = 'tests/data/i2b2_2016_track-1_reference_out/0005_gs.xml'
    config_file = 'config/i2b2_2016_track-1.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    with open(presaved_file, 'r') as fp:
        reloaded_json = json.load(fp)
    offset_mapping , strict_starts = \
      text_extraction.extract_annotations( ingest_file ,
                                           namespaces = namespaces ,
                                           document_data = document_data ,
                                           patterns = patterns ,
                                           skip_chars = r'[\s]' ,
                                           out_file = None )
    assert reloaded_json['annotations'] == strict_starts
Ejemplo n.º 25
0
def test_count_ref_set_by_type_and_file(capsys):
    presaved_file = 'tests/data/i2b2_2016_track-1_reference_out/0005_gs.xml'
    command_line_args = [
        'etude.py', '--reference-input',
        'tests/data/i2b2_2016_track-1_reference', '--reference-config',
        'config/i2b2_2016_track-1.conf', '--by-type', '--by-file',
        '--print-counts', '--no-metrics'
    ]
    with patch.object(sys, 'argv', command_line_args):
        args = etude.init_args()
        namespaces , document_data , patterns = \
          args_and_configs.process_config( config_file = args.reference_config ,
                                           score_key = args.score_key ,
                                           score_values = args.score_values )
        with open(presaved_file, 'r') as fp:
            reloaded_json = json.load(fp)
        etude.count_ref_set(this_ns=namespaces,
                            this_dd=document_data,
                            this_patterns=patterns,
                            this_folder=args.reference_input,
                            args=args,
                            file_prefix=args.file_prefix,
                            file_suffix=args.file_suffix[0])
        default_out, err = capsys.readouterr()
        expected_values = [['counts', 'n'], ['Total', '482'],
                           ['0005_gs.xml', '36'], ['0016_gs.xml', '54'],
                           ['0267_gs.xml', '63'], ['0273_gs.xml', '35'],
                           ['0389_gs.xml', '40'], ['0475_gs.xml', '46'],
                           ['0617_gs.xml', '38'], ['0709_gs.xml', '45'],
                           ['0982_gs.xml', '100'], ['0992_gs.xml', '25'],
                           ['Age', '92'], ['DateTime',
                                           '124'], ['HCUnit', '76'],
                           ['OtherGeo', '5'], ['OtherID', '7'],
                           ['OtherOrg', '21'], ['Patient', '19'],
                           ['PhoneFax', '6'], ['Provider', '64'], ['SSN', '0'],
                           ['StateCountry', '33'], ['StreetCity', '29'],
                           ['Zip', '4'], ['eAddress', '2']]
        for expected_values in expected_values:
            print(args.delim.join('{}'.format(m) for m in expected_values))
        expected_out, err = capsys.readouterr()
        default_out = default_out.strip()
        expected_out = expected_out.strip()
        assert default_out == expected_out
Ejemplo n.º 26
0
def test_of_identity_read_write_of_dictionary_for_complex_patterns():
    ingest_file = 'tests/data/i2b2_2016_track-1_reference/0005_gs.xml'
    config_file = 'config/i2b2_2016_track-1.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    with tempfile.NamedTemporaryFile() as tmpfile_handle:
        assert os.path.exists(tmpfile_handle.name)
        offset_mapping , strict_starts = \
          text_extraction.extract_annotations( ingest_file ,
                                               namespaces = namespaces ,
                                               document_data = document_data ,
                                               patterns = patterns ,
                                               skip_chars = r'[\s]' ,
                                               out_file = tmpfile_handle.name )
        reloaded_json = json.load(tmpfile_handle)
        assert reloaded_json['annotations'] == strict_starts
        assert os.path.exists(tmpfile_handle.name)
    assert os.path.exists(tmpfile_handle.name) == False
Ejemplo n.º 27
0
def test_empty_contents_of_write_of_dictionary_for_brat_patterns():
    ingest_file = 'tests/data/brat_reference/ibm.ann'
    config_file = 'config/brat_problems_allergies_standoff.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    with tempfile.NamedTemporaryFile() as tmpfile_handle:
        assert os.path.exists(tmpfile_handle.name)
        offset_mapping , strict_starts = \
          text_extraction.extract_annotations( ingest_file ,
                                               namespaces = namespaces ,
                                               document_data = document_data ,
                                               patterns = patterns ,
                                               skip_chars = r'[\s]' ,
                                               out_file = tmpfile_handle.name )
        assert strict_starts == {}
        assert os.path.exists(tmpfile_handle.name)
        with open(tmpfile_handle.name, 'r') as rf:
            reloaded_out_file = json.load(rf)
        assert reloaded_out_file["annotations"] == {}
        assert reloaded_out_file[
            "raw_content"] == "International Business Machines Corporation: IBM is Big Blue\n"
    assert os.path.exists(tmpfile_handle.name) == False
Ejemplo n.º 28
0
def test_extracting_with_and_without_optional_attributes_called_by_parent():
    ingest_file = 'tests/data/013_Conditional_Problem.xmi'
    config_file = 'config/webanno_problems_allergies_xmi.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    patterns.pop()
    offset_mapping , annots_with_opt_attributes = \
      text_extraction.extract_annotations( ingest_file ,
                                           namespaces = namespaces ,
                                           document_data = document_data ,
                                           patterns = patterns ,
                                           skip_chars = None ,
                                           out_file = None )
    patterns[0]['optional_attributes'] = []
    offset_mapping , annots_without_opt_attributes = \
      text_extraction.extract_annotations( ingest_file ,
                                           namespaces = namespaces ,
                                           document_data = document_data ,
                                           patterns = patterns ,
                                           skip_chars = None ,
                                           out_file = None )
    expected_output_without_opt_attributes = \
      { '181' :  [ { 'type': 'Problem' ,
                      'begin_pos': '181' ,
                      'end_pos': '188' ,
                      'raw_text': None } ] ,
        '218' : [ { 'type': 'Problem' ,
                   'begin_pos': '218' ,
                   'end_pos': '224' ,
                   'raw_text': None } ]
      }
    expected_output_with_opt_attributes = \
      { '181' :  [ { 'type': 'Problem' ,
                     'begin_pos': '181' ,
                     'end_pos': '188' ,
                     'raw_text': None ,
                     'conditional' : 'true' ,
                     'generic' : 'false' ,
                     'historical' : 'false' ,
                     'negated' : 'false' ,
                     'not_patient' : 'true' ,
                     'uncertain' : 'false' } ] ,
        '218' : [ { 'type': 'Problem' ,
                    'begin_pos': '218' ,
                    'end_pos': '224' ,
                    'raw_text': None ,
                    'conditional' : 'false' ,
                    'generic' : 'false' ,
                    'historical' : 'true' ,
                    'negated' : 'false' ,
                    'not_patient' : 'false' ,
                    'uncertain' : 'true' } ]
      }
    assert annots_with_opt_attributes == \
        expected_output_with_opt_attributes
    assert annots_without_opt_attributes == \
        expected_output_without_opt_attributes
    assert annots_with_opt_attributes != \
        expected_output_without_opt_attributes
    assert annots_without_opt_attributes != \
        expected_output_with_opt_attributes
Ejemplo n.º 29
0
            args.test_config, args.test_input, args.test_out, args.score_key,
            args.fuzzy_flags
        ])
    return args


if __name__ == "__main__":
    ##
    args = init_args()
    ## Extract and process the two input file configs
    if (args.reference_input):
        try:
            reference_ns , reference_dd , reference_patterns = \
              args_and_configs.process_config( config_file = args.reference_config ,
                                               score_key = args.score_key ,
                                               score_values = args.score_values ,
                                               collapse_all_patterns = args.collapse_all_patterns ,
                                               verbose = args.verbose )
        except:
            e = sys.exc_info()[0]
            log.error(
                'Uncaught exception in process_config for reference config:  {}'
                .format(e))
        if (reference_patterns == []):
            log.error(
                'No reference patterns extracted from config.  Bailing out now.'
            )
            exit(1)
    if (args.test_input):
        try:
            test_ns , test_dd , test_patterns = \
Ejemplo n.º 30
0
def test_extracting_with_and_without_optional_attributes():
    ingest_file = 'tests/data/013_Conditional_Problem.xmi'
    config_file = 'config/webanno_problems_allergies_xmi.conf'
    namespaces , document_data , patterns = \
      args_and_configs.process_config( config_file = config_file ,
                                       score_key = 'Short Name' ,
                                       score_values = [ '.*' ] )
    strict_starts_no_opt_attributes = \
      text_extraction.extract_annotations_xml( ingest_file ,
                                               offset_mapping = {} ,
                                               annotation_path = \
                                                 './custom:Problems' ,
                                               tag_name = 'Problem' ,
                                               namespaces = namespaces ,
                                               begin_attribute = 'begin' ,
                                               end_attribute = 'end' ,
                                               optional_attributes = [] )
    strict_starts_with_opt_attributes = \
      text_extraction.extract_annotations_xml( ingest_file ,
                                               offset_mapping = {} ,
                                               annotation_path = \
                                                 './custom:Problems' ,
                                               tag_name = 'Problem' ,
                                               namespaces = namespaces ,
                                               begin_attribute = 'begin' ,
                                               end_attribute = 'end' ,
                                               optional_attributes = \
                                                 patterns[ 0 ][ 'optional_attributes' ] )
    expected_output_no_opt_attributes = \
      { '181' :  [ { 'type': 'Problem' ,
                      'begin_pos': '181' ,
                      'end_pos': '188' ,
                      'raw_text': None } ] ,
        '218' : [ { 'type': 'Problem' ,
                   'begin_pos': '218' ,
                   'end_pos': '224' ,
                   'raw_text': None } ]
      }
    expected_output_with_opt_attributes = \
      { '181' :  [ { 'type': 'Problem' ,
                     'begin_pos': '181' ,
                     'end_pos': '188' ,
                     'raw_text': None ,
                     'conditional' : 'true' ,
                     'generic' : 'false' ,
                     'historical' : 'false' ,
                     'negated' : 'false' ,
                     'not_patient' : 'true' ,
                     'uncertain' : 'false' } ] ,
        '218' : [ { 'type': 'Problem' ,
                    'begin_pos': '218' ,
                    'end_pos': '224' ,
                    'raw_text': None ,
                    'conditional' : 'false' ,
                    'generic' : 'false' ,
                    'historical' : 'true' ,
                    'negated' : 'false' ,
                    'not_patient' : 'false' ,
                    'uncertain' : 'true' } ]
      }
    assert strict_starts_no_opt_attributes == \
        expected_output_no_opt_attributes
    assert strict_starts_with_opt_attributes == \
        expected_output_with_opt_attributes
    assert strict_starts_no_opt_attributes != \
        expected_output_with_opt_attributes
    assert strict_starts_with_opt_attributes != \
        expected_output_no_opt_attributes