def test_lattice_with_token_list_kind(): comm = create_comm('comm-1', 'mambo no. 4') tokenization = comm.sectionList[0].sentenceList[0].tokenization lattice_path = LatticePath() lattice_path.tokenList = [Token(tokenIndex=0, text='mambo'), Token(tokenIndex=0, text='no.'), Token(tokenIndex=0, text='3')] token_lattice = TokenLattice() token_lattice.cachedBestPath = lattice_path tokenization.lattice = token_lattice token_texts = [t.text for t in get_tokens(tokenization)] assert ['mambo', 'no.', '4'] == token_texts
def test_print_conll_missing_char_offsets(capsys): comm_without_token_textspans = comm_with_other_tags() for tokenization in get_tokenizations(comm_without_token_textspans): for token in get_tokens(tokenization): token.textSpan = None print_conll_style_tags_for_communication(comm_without_token_textspans, char_offsets=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\tCHAR\n' '-----\t-----\t----\n' '1\tThe\t\n' '2\tquick\t\n')
def test_lattice_with_token_list_kind(self): comm = create_comm('comm-1', 'mambo no. 4') tokenization = comm.sectionList[0].sentenceList[0].tokenization lattice_path = LatticePath() lattice_path.tokenList = [ Token(tokenIndex=0, text='mambo'), Token(tokenIndex=0, text='no.'), Token(tokenIndex=0, text='3') ] token_lattice = TokenLattice() token_lattice.cachedBestPath = lattice_path tokenization.lattice = token_lattice token_texts = [t.text for t in get_tokens(tokenization)] self.assertEqual(['mambo', 'no.', '4'], token_texts)
def test_print_conll_missing_char_offsets(capsys): comm_without_token_textspans = comm_with_other_tags() for tokenization in get_tokenizations(comm_without_token_textspans): for token in get_tokens(tokenization): token.textSpan = None print_conll_style_tags_for_communication( comm_without_token_textspans, char_offsets=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\tCHAR\n' '-----\t-----\t----\n' '1\tThe\t\n' '2\tquick\t\n' )
def test_no_lattice_with_no_kind(self): comm = create_comm('comm-1', 'mambo no. 4') tokenization = comm.sectionList[0].sentenceList[0].tokenization token_texts = [t.text for t in get_tokens(tokenization)] self.assertEqual(['mambo', 'no.', '4'], token_texts)
def test_get_tokens_invalid_kind(): with raises(ValueError): get_tokens(Tokenization(kind='invalid-kind'))
def test_no_lattice_with_no_kind(): comm = create_comm('comm-1', 'mambo no. 4') tokenization = comm.sectionList[0].sentenceList[0].tokenization token_texts = [t.text for t in get_tokens(tokenization)] assert ['mambo', 'no.', '4'] == token_texts