def test_print_conll_other_tags_ignore_all(capsys): print_conll_style_tags_for_communication(comm_with_other_tags()) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\n' '-----\t-----\n' '1\tThe\n' '2\tquick\n')
def test_print_conll_ending_tags(capsys): print_conll_style_tags_for_communication(comm_with_other_tags(), endings=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\tENDING\n' '-----\t-----\t------\n' '1\tThe\t3\n' '2\tquick\t9\n' '3\tbrown\t15\n')
def test_print_conll_start_tags(capsys): print_conll_style_tags_for_communication(comm_with_other_tags(), starts=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\tSTART\n' '-----\t-----\t-----\n' '1\tThe\t0\n' '2\tquick\t4\n' '3\tbrown\t10\n')
def test_print_conll_other_tags_ignore_some(capsys): print_conll_style_tags_for_communication(comm_with_other_tags(), other_tags=dict(upper=None)) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\tupper\n' '-----\t-----\t-----\n' '1\tThe\tTHE\n' '2\tquick\tQUICK\n') assert '3\tshe\tSHE\n' in out
def test_print_conll_other_tags_ignore_all(capsys): print_conll_style_tags_for_communication( comm_with_other_tags()) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\n' '-----\t-----\n' '1\tThe\n' '2\tquick\n' )
def test_print_conll_other_tags_ignore_some(capsys): print_conll_style_tags_for_communication( comm_with_other_tags(), other_tags=dict(upper=None)) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\tupper\n' '-----\t-----\t-----\n' '1\tThe\tTHE\n' '2\tquick\tQUICK\n' ) assert '3\tshe\tSHE\n' in out
def test_print_conll_other_tags_repeated_ner(capsys): print_conll_style_tags_for_communication(comm_with_other_tags( 'ner', 'ner'), ner=True, other_tags=dict(upper=None)) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\tNER\tNER\tupper\n' '-----\t-----\t---\t---\t-----\n' '1\tThe\tner_0/0\tner_0/1\tTHE\n' '2\tquick\tner_1/0\tner_1/1\tQUICK\n') assert '3\tshe\tner_2/0\tner_2/1\tSHE\n' in out
def test_print_conll_other_tags_repeated_other_tag_filtered(capsys): print_conll_style_tags_for_communication( comm_with_other_tags('upper', 'ner'), ner=True, other_tags=dict(upper=lambda anns: filter( lambda ann: ann.metadata.tool == 'tool/0', anns))) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\tNER\tupper\n' '-----\t-----\t---\t-----\n' '1\tThe\tner_0/1\tupper_0/0\n' '2\tquick\tner_1/1\tupper_1/0\n') assert '3\tshe\tner_2/1\tupper_2/0\n' in out
def test_print_conll_missing_char_offsets(capsys): comm_without_token_textspans = comm_with_other_tags() for tokenization in get_tokenizations(comm_without_token_textspans): for token in get_tokens(tokenization): token.textSpan = None print_conll_style_tags_for_communication(comm_without_token_textspans, char_offsets=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\tCHAR\n' '-----\t-----\t----\n' '1\tThe\t\n' '2\tquick\t\n')
def test_print_conll_start_tags(capsys): print_conll_style_tags_for_communication( comm_with_other_tags(), starts=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\tSTART\n' '-----\t-----\t-----\n' '1\tThe\t0\n' '2\tquick\t4\n' '3\tbrown\t10\n' )
def test_print_conll_ending_tags(capsys): print_conll_style_tags_for_communication( comm_with_other_tags(), endings=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\tENDING\n' '-----\t-----\t------\n' '1\tThe\t3\n' '2\tquick\t9\n' '3\tbrown\t15\n' )
def test_print_conll_other_tags_repeated_ner(capsys): print_conll_style_tags_for_communication( comm_with_other_tags('ner', 'ner'), ner=True, other_tags=dict(upper=None)) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\tNER\tNER\tupper\n' '-----\t-----\t---\t---\t-----\n' '1\tThe\tner_0/0\tner_0/1\tTHE\n' '2\tquick\tner_1/0\tner_1/1\tQUICK\n' ) assert '3\tshe\tner_2/0\tner_2/1\tSHE\n' in out
def test_print_conll_missing_char_offsets(capsys): comm_without_token_textspans = comm_with_other_tags() for tokenization in get_tokenizations(comm_without_token_textspans): for token in get_tokens(tokenization): token.textSpan = None print_conll_style_tags_for_communication( comm_without_token_textspans, char_offsets=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\tCHAR\n' '-----\t-----\t----\n' '1\tThe\t\n' '2\tquick\t\n' )
def test_print_conll_other_tags_repeated_other_tag_filtered(capsys): print_conll_style_tags_for_communication( comm_with_other_tags('upper', 'ner'), ner=True, other_tags=dict( upper=lambda anns: filter(lambda ann: ann.metadata.tool == 'tool/0', anns) )) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\tNER\tupper\n' '-----\t-----\t---\t-----\n' '1\tThe\tner_0/1\tupper_0/0\n' '2\tquick\tner_1/1\tupper_1/0\n' ) assert '3\tshe\tner_2/1\tupper_2/0\n' in out
def test_print_conll_missing_tags(capsys): # We don't use comm_with_other_tags() here because we want to test # the case where: # tokenization.TokenTaggingList = None comm = create_comm( 'quick', '''\ The quick brown fox jumped over the lazy dog . Or did she ? ''') print_conll_style_tags_for_communication(comm, ner=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\n' '-----\t-----\n' '1\tThe\n' '2\tquick\n')
def test_print_conll_missing_tags(capsys): # We don't use comm_with_other_tags() here because we want to test # the case where: # tokenization.TokenTaggingList = None comm = create_comm('quick', '''\ The quick brown fox jumped over the lazy dog . Or did she ? ''') print_conll_style_tags_for_communication(comm, ner=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\n' '-----\t-----\n' '1\tThe\n' '2\tquick\n' )
def test_print_conll_char_offsets(capsys): print_conll_style_tags_for_communication( comm_with_other_tags(), char_offsets=True)
def test_print_conll_char_offsets(capsys): print_conll_style_tags_for_communication(comm_with_other_tags(), char_offsets=True)