Exemple #1
0
def test_print_conll_other_tags_ignore_all(capsys):
    print_conll_style_tags_for_communication(comm_with_other_tags())
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\n'
                          '-----\t-----\n'
                          '1\tThe\n'
                          '2\tquick\n')
Exemple #2
0
def test_print_conll_ending_tags(capsys):
    print_conll_style_tags_for_communication(comm_with_other_tags(),
                                             endings=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\tENDING\n'
                          '-----\t-----\t------\n'
                          '1\tThe\t3\n'
                          '2\tquick\t9\n'
                          '3\tbrown\t15\n')
Exemple #3
0
def test_print_conll_start_tags(capsys):
    print_conll_style_tags_for_communication(comm_with_other_tags(),
                                             starts=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\tSTART\n'
                          '-----\t-----\t-----\n'
                          '1\tThe\t0\n'
                          '2\tquick\t4\n'
                          '3\tbrown\t10\n')
Exemple #4
0
def test_print_conll_other_tags_ignore_some(capsys):
    print_conll_style_tags_for_communication(comm_with_other_tags(),
                                             other_tags=dict(upper=None))
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\tupper\n'
                          '-----\t-----\t-----\n'
                          '1\tThe\tTHE\n'
                          '2\tquick\tQUICK\n')
    assert '3\tshe\tSHE\n' in out
def test_print_conll_other_tags_ignore_all(capsys):
    print_conll_style_tags_for_communication(
        comm_with_other_tags())
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\n'
        '-----\t-----\n'
        '1\tThe\n'
        '2\tquick\n'
    )
def test_print_conll_other_tags_ignore_some(capsys):
    print_conll_style_tags_for_communication(
        comm_with_other_tags(), other_tags=dict(upper=None))
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\tupper\n'
        '-----\t-----\t-----\n'
        '1\tThe\tTHE\n'
        '2\tquick\tQUICK\n'
    )
    assert '3\tshe\tSHE\n' in out
Exemple #7
0
def test_print_conll_other_tags_repeated_ner(capsys):
    print_conll_style_tags_for_communication(comm_with_other_tags(
        'ner', 'ner'),
                                             ner=True,
                                             other_tags=dict(upper=None))
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\tNER\tNER\tupper\n'
                          '-----\t-----\t---\t---\t-----\n'
                          '1\tThe\tner_0/0\tner_0/1\tTHE\n'
                          '2\tquick\tner_1/0\tner_1/1\tQUICK\n')
    assert '3\tshe\tner_2/0\tner_2/1\tSHE\n' in out
Exemple #8
0
def test_print_conll_other_tags_repeated_other_tag_filtered(capsys):
    print_conll_style_tags_for_communication(
        comm_with_other_tags('upper', 'ner'),
        ner=True,
        other_tags=dict(upper=lambda anns: filter(
            lambda ann: ann.metadata.tool == 'tool/0', anns)))
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\tNER\tupper\n'
                          '-----\t-----\t---\t-----\n'
                          '1\tThe\tner_0/1\tupper_0/0\n'
                          '2\tquick\tner_1/1\tupper_1/0\n')
    assert '3\tshe\tner_2/1\tupper_2/0\n' in out
Exemple #9
0
def test_print_conll_missing_char_offsets(capsys):
    comm_without_token_textspans = comm_with_other_tags()
    for tokenization in get_tokenizations(comm_without_token_textspans):
        for token in get_tokens(tokenization):
            token.textSpan = None
    print_conll_style_tags_for_communication(comm_without_token_textspans,
                                             char_offsets=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\tCHAR\n'
                          '-----\t-----\t----\n'
                          '1\tThe\t\n'
                          '2\tquick\t\n')
def test_print_conll_start_tags(capsys):
    print_conll_style_tags_for_communication(
        comm_with_other_tags(),
        starts=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\tSTART\n'
        '-----\t-----\t-----\n'
        '1\tThe\t0\n'
        '2\tquick\t4\n'
        '3\tbrown\t10\n'
    )
def test_print_conll_ending_tags(capsys):
    print_conll_style_tags_for_communication(
        comm_with_other_tags(),
        endings=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\tENDING\n'
        '-----\t-----\t------\n'
        '1\tThe\t3\n'
        '2\tquick\t9\n'
        '3\tbrown\t15\n'
    )
def test_print_conll_other_tags_repeated_ner(capsys):
    print_conll_style_tags_for_communication(
        comm_with_other_tags('ner', 'ner'),
        ner=True,
        other_tags=dict(upper=None))
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\tNER\tNER\tupper\n'
        '-----\t-----\t---\t---\t-----\n'
        '1\tThe\tner_0/0\tner_0/1\tTHE\n'
        '2\tquick\tner_1/0\tner_1/1\tQUICK\n'
    )
    assert '3\tshe\tner_2/0\tner_2/1\tSHE\n' in out
def test_print_conll_missing_char_offsets(capsys):
    comm_without_token_textspans = comm_with_other_tags()
    for tokenization in get_tokenizations(comm_without_token_textspans):
        for token in get_tokens(tokenization):
            token.textSpan = None
    print_conll_style_tags_for_communication(
        comm_without_token_textspans, char_offsets=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\tCHAR\n'
        '-----\t-----\t----\n'
        '1\tThe\t\n'
        '2\tquick\t\n'
    )
def test_print_conll_other_tags_repeated_other_tag_filtered(capsys):
    print_conll_style_tags_for_communication(
        comm_with_other_tags('upper', 'ner'),
        ner=True,
        other_tags=dict(
            upper=lambda anns: filter(lambda ann: ann.metadata.tool == 'tool/0', anns)
        ))
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\tNER\tupper\n'
        '-----\t-----\t---\t-----\n'
        '1\tThe\tner_0/1\tupper_0/0\n'
        '2\tquick\tner_1/1\tupper_1/0\n'
    )
    assert '3\tshe\tner_2/1\tupper_2/0\n' in out
Exemple #15
0
def test_print_conll_missing_tags(capsys):
    # We don't use comm_with_other_tags() here because we want to test
    # the case where:
    #   tokenization.TokenTaggingList = None
    comm = create_comm(
        'quick', '''\
The quick brown fox jumped
over the lazy dog .

Or did she ?
''')
    print_conll_style_tags_for_communication(comm, ner=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\n'
                          '-----\t-----\n'
                          '1\tThe\n'
                          '2\tquick\n')
def test_print_conll_missing_tags(capsys):
    # We don't use comm_with_other_tags() here because we want to test
    # the case where:
    #   tokenization.TokenTaggingList = None
    comm = create_comm('quick', '''\
The quick brown fox jumped
over the lazy dog .

Or did she ?
''')
    print_conll_style_tags_for_communication(comm, ner=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\n'
        '-----\t-----\n'
        '1\tThe\n'
        '2\tquick\n'
    )
def test_print_conll_char_offsets(capsys):
    print_conll_style_tags_for_communication(
        comm_with_other_tags(), char_offsets=True)
Exemple #18
0
def test_print_conll_char_offsets(capsys):
    print_conll_style_tags_for_communication(comm_with_other_tags(),
                                             char_offsets=True)