Ejemplo n.º 1
0
def test_lattice_with_token_list_kind():
    comm = create_comm('comm-1', 'mambo no. 4')
    tokenization = comm.sectionList[0].sentenceList[0].tokenization
    lattice_path = LatticePath()
    lattice_path.tokenList = [Token(tokenIndex=0, text='mambo'),
                              Token(tokenIndex=0, text='no.'),
                              Token(tokenIndex=0, text='3')]
    token_lattice = TokenLattice()
    token_lattice.cachedBestPath = lattice_path
    tokenization.lattice = token_lattice
    token_texts = [t.text for t in get_tokens(tokenization)]
    assert ['mambo', 'no.', '4'] == token_texts
Ejemplo n.º 2
0
def test_print_conll_missing_char_offsets(capsys):
    comm_without_token_textspans = comm_with_other_tags()
    for tokenization in get_tokenizations(comm_without_token_textspans):
        for token in get_tokens(tokenization):
            token.textSpan = None
    print_conll_style_tags_for_communication(comm_without_token_textspans,
                                             char_offsets=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\tCHAR\n'
                          '-----\t-----\t----\n'
                          '1\tThe\t\n'
                          '2\tquick\t\n')
Ejemplo n.º 3
0
 def test_lattice_with_token_list_kind(self):
     comm = create_comm('comm-1', 'mambo no. 4')
     tokenization = comm.sectionList[0].sentenceList[0].tokenization
     lattice_path = LatticePath()
     lattice_path.tokenList = [
         Token(tokenIndex=0, text='mambo'),
         Token(tokenIndex=0, text='no.'),
         Token(tokenIndex=0, text='3')
     ]
     token_lattice = TokenLattice()
     token_lattice.cachedBestPath = lattice_path
     tokenization.lattice = token_lattice
     token_texts = [t.text for t in get_tokens(tokenization)]
     self.assertEqual(['mambo', 'no.', '4'], token_texts)
Ejemplo n.º 4
0
def test_print_conll_missing_char_offsets(capsys):
    comm_without_token_textspans = comm_with_other_tags()
    for tokenization in get_tokenizations(comm_without_token_textspans):
        for token in get_tokens(tokenization):
            token.textSpan = None
    print_conll_style_tags_for_communication(
        comm_without_token_textspans, char_offsets=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\tCHAR\n'
        '-----\t-----\t----\n'
        '1\tThe\t\n'
        '2\tquick\t\n'
    )
Ejemplo n.º 5
0
 def test_no_lattice_with_no_kind(self):
     comm = create_comm('comm-1', 'mambo no. 4')
     tokenization = comm.sectionList[0].sentenceList[0].tokenization
     token_texts = [t.text for t in get_tokens(tokenization)]
     self.assertEqual(['mambo', 'no.', '4'], token_texts)
Ejemplo n.º 6
0
def test_get_tokens_invalid_kind():
    with raises(ValueError):
        get_tokens(Tokenization(kind='invalid-kind'))
Ejemplo n.º 7
0
def test_no_lattice_with_no_kind():
    comm = create_comm('comm-1', 'mambo no. 4')
    tokenization = comm.sectionList[0].sentenceList[0].tokenization
    token_texts = [t.text for t in get_tokens(tokenization)]
    assert ['mambo', 'no.', '4'] == token_texts