Esempio n. 1
0
def test_comm_container_fetch_handler():
    comm_container = {'one': create_comm('one'), 'two': create_comm('two')}

    impl = CommunicationContainerFetchHandler(comm_container)
    host = 'localhost'
    port = find_port()

    with SubprocessFetchCommunicationServiceWrapper(impl, host, port):
        with FetchCommunicationClientWrapper(host, port) as cli:
            assert cli.getCommunicationCount()
            ids = cli.getCommunicationIDs(0, 10)
            assert 'one' in ids
            assert 'two' in ids
            assert 'foo' not in ids
Esempio n. 2
0
def test_fetch_backed_container():
    comm_container = {'one': create_comm('one'), 'two': create_comm('two')}

    impl = CommunicationContainerFetchHandler(comm_container)
    host = 'localhost'
    port = find_port()

    with SubprocessFetchCommunicationServiceWrapper(impl, host, port):
        cc = FetchBackedCommunicationContainer(host, port)
        assert len(cc) == 2
        assert 'one' in cc
        assert 'two' in cc
        for comm_id in cc:
            comm = cc[comm_id]
            assert validate_communication(comm)
Esempio n. 3
0
def test_create_comm_complex_al_sentence():
    comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n',
                       annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert 2 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 2 == sect.textSpan.start
    assert 23 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 2 == sent.textSpan.start
    assert 16 == sent.textSpan.ending
    assert sent.tokenization is None
    sent = sect.sentenceList[1]
    assert 17 == sent.textSpan.start
    assert 23 == sent.textSpan.ending
    assert sent.tokenization is None

    sect = comm.sectionList[1]
    assert 25 == sect.textSpan.start
    assert 30 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 25 == sent.textSpan.start
    assert 30 == sent.textSpan.ending
    assert sent.tokenization is None

    assert validate_communication(comm)
Esempio n. 4
0
def test_create_comm_complex_al_none():
    comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n',
                       annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Esempio n. 5
0
def test_create_comm_complex_al_sentence():
    comm = create_comm('one',
                       '\n\nsimple comm\t\t.\nor ...\n\nisit?\n',
                       annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert 2 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 2 == sect.textSpan.start
    assert 23 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 2 == sent.textSpan.start
    assert 16 == sent.textSpan.ending
    assert sent.tokenization is None
    sent = sect.sentenceList[1]
    assert 17 == sent.textSpan.start
    assert 23 == sent.textSpan.ending
    assert sent.tokenization is None

    sect = comm.sectionList[1]
    assert 25 == sect.textSpan.start
    assert 30 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 25 == sent.textSpan.start
    assert 30 == sent.textSpan.ending
    assert sent.tokenization is None

    assert validate_communication(comm)
Esempio n. 6
0
def comm_with_other_tags(*additional_tagging_types):
    comm = create_comm(
        'quick', '''\
The quick brown fox jumped
over the lazy dog .

Or did she ?
''')
    for section in comm.sectionList:
        for sentence in section.sentenceList:
            sentence.tokenization.tokenTaggingList = [
                TokenTagging(
                    uuid=generate_UUID(),
                    metadata=AnnotationMetadata(
                        tool=u'tool',
                        timestamp=1,
                    ),
                    taggingType=u'upper',
                    taggedTokenList=[
                        TaggedToken(
                            tokenIndex=token.tokenIndex,
                            tag=token.text.upper(),
                        )
                        for token in sentence.tokenization.tokenList.tokenList
                    ],
                ),
                TokenTagging(
                    uuid=generate_UUID(),
                    metadata=AnnotationMetadata(
                        tool=u'tool',
                        timestamp=1,
                    ),
                    taggingType=u'lower',
                    taggedTokenList=[
                        TaggedToken(
                            tokenIndex=token.tokenIndex,
                            tag=token.text.lower(),
                        )
                        for token in sentence.tokenization.tokenList.tokenList
                    ],
                ),
            ] + [
                TokenTagging(
                    uuid=generate_UUID(),
                    metadata=AnnotationMetadata(
                        tool=u'tool/{}'.format(i),
                        timestamp=1,
                    ),
                    taggingType=tagging_type,
                    taggedTokenList=[
                        TaggedToken(
                            tokenIndex=token.tokenIndex,
                            tag='{}_{}/{}'.format(tagging_type,
                                                  token.tokenIndex, i),
                        )
                        for token in sentence.tokenization.tokenList.tokenList
                    ],
                ) for (i, tagging_type) in enumerate(additional_tagging_types)
            ]
    return comm
Esempio n. 7
0
def test_annotate():
    impl = NoopAnnotateCommunicationService()
    host = 'localhost'
    port = find_port()
    timeout = 5

    comm_id = '1-2-3-4'
    comm = create_comm(comm_id)

    comm_uuid_uuidString = comm.uuid.uuidString
    comm_metadata_tool = comm.metadata.tool
    comm_metadata_timestamp = comm.metadata.timestamp

    with SubprocessAnnotateCommunicationServiceWrapper(impl,
                                                       host,
                                                       port,
                                                       timeout=timeout):
        transport = TSocket.TSocket(host, port)
        transport = TTransport.TFramedTransport(transport)
        protocol = TCompactProtocol.TCompactProtocolAccelerated(transport)

        cli = AnnotateCommunicationService.Client(protocol)
        transport.open()
        res = cli.annotate(comm)
        transport.close()

        assert res.id == comm_id
        assert res.uuid.uuidString == comm_uuid_uuidString
        assert res.metadata.tool == comm_metadata_tool
        assert res.metadata.timestamp == comm_metadata_timestamp
Esempio n. 8
0
def test_read_write_fixed_point():
    comm = create_comm('comm-1')
    buf_1 = write_communication_to_buffer(comm)
    buf_2 = write_communication_to_buffer(
        read_communication_from_buffer(buf_1)
    )
    assert buf_1 == buf_2
Esempio n. 9
0
def test_annotate():
    impl = NoopAnnotateCommunicationService()
    host = 'localhost'
    port = find_port()
    timeout = 5

    comm_id = '1-2-3-4'
    comm = create_comm(comm_id)

    comm_uuid_uuidString = comm.uuid.uuidString
    comm_metadata_tool = comm.metadata.tool
    comm_metadata_timestamp = comm.metadata.timestamp

    with SubprocessAnnotateCommunicationServiceWrapper(impl, host, port,
                                                       timeout=timeout):
        transport = TSocket.TSocket(host, port)
        transport = TTransport.TFramedTransport(transport)
        protocol = TCompactProtocol.TCompactProtocolAccelerated(transport)

        cli = AnnotateCommunicationService.Client(protocol)
        transport.open()
        res = cli.annotate(comm)
        transport.close()

        assert res.id == comm_id
        assert res.uuid.uuidString == comm_uuid_uuidString
        assert res.metadata.tool == comm_metadata_tool
        assert res.metadata.timestamp == comm_metadata_timestamp
Esempio n. 10
0
def comm_with_other_tags(*additional_tagging_types):
    comm = create_comm('quick', '''\
The quick brown fox jumped
over the lazy dog .

Or did she ?
''')
    for section in comm.sectionList:
        for sentence in section.sentenceList:
            sentence.tokenization.tokenTaggingList = [
                TokenTagging(
                    uuid=generate_UUID(),
                    metadata=AnnotationMetadata(
                        tool=u'tool',
                        timestamp=1,
                    ),
                    taggingType=u'upper',
                    taggedTokenList=[
                        TaggedToken(
                            tokenIndex=token.tokenIndex,
                            tag=token.text.upper(),
                        )
                        for token in sentence.tokenization.tokenList.tokenList
                    ],
                ),
                TokenTagging(
                    uuid=generate_UUID(),
                    metadata=AnnotationMetadata(
                        tool=u'tool',
                        timestamp=1,
                    ),
                    taggingType=u'lower',
                    taggedTokenList=[
                        TaggedToken(
                            tokenIndex=token.tokenIndex,
                            tag=token.text.lower(),
                        )
                        for token in sentence.tokenization.tokenList.tokenList
                    ],
                ),
            ] + [
                TokenTagging(
                    uuid=generate_UUID(),
                    metadata=AnnotationMetadata(
                        tool=u'tool/{}'.format(i),
                        timestamp=1,
                    ),
                    taggingType=tagging_type,
                    taggedTokenList=[
                        TaggedToken(
                            tokenIndex=token.tokenIndex,
                            tag='{}_{}/{}'.format(tagging_type, token.tokenIndex, i),
                        )
                        for token in sentence.tokenization.tokenList.tokenList
                    ],
                )
                for (i, tagging_type) in enumerate(additional_tagging_types)
            ]
    return comm
Esempio n. 11
0
def test_comm_container_fetch_handler():
    comm_container = {
        'one': create_comm('one'),
        'two': create_comm('two')
    }

    impl = CommunicationContainerFetchHandler(comm_container)
    host = 'localhost'
    port = find_port()

    with SubprocessFetchCommunicationServiceWrapper(impl, host, port):
        with FetchCommunicationClientWrapper(host, port) as cli:
            assert cli.getCommunicationCount()
            ids = cli.getCommunicationIDs(0, 10)
            assert 'one' in ids
            assert 'two' in ids
            assert 'foo' not in ids
Esempio n. 12
0
def test_create_comm_complex_al_none():
    comm = create_comm('one',
                       '\n\nsimple comm\t\t.\nor ...\n\nisit?\n',
                       annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Esempio n. 13
0
def index_volume():
    docker_build('Dockerfile.concrete_python', 'concrete-python-image')
    docker_build('Dockerfile.index', 'indexing-image')

    docker_volume_create('data')
    docker_volume_create('index')

    zip_path = mktemp_path('.zip')
    with CommunicationWriterZip(zip_path) as writer:
        writer.write(create_comm(
            'test comm 0',
            'w1\n{}\nw1\n\nw2 w4\n'.format(make_stop_word_text())))
        writer.write(create_comm(
            'test comm 1',
            '{}'.format(make_stop_word_text())))
        writer.write(create_comm(
            'test comm 2',
            '{}\nw3\nw3 w2\n'.format(make_stop_word_text())))

    docker_run(
        'concrete-python-image', 'sleep', 'infinity',
        d=True, v=('data', '/mnt/data'), name='data-copy')
    docker_cp(zip_path, ('data-copy', '/mnt/data/comms.tar.gz'))
    force_remove_container('data-copy')

    docker_run(
        'concrete-python-image',
        'fetch-server.py', '/mnt/data/comms.tar.gz',
        '--host', 'localhost', '--port', '9090',
        d=True, v=('data', '/mnt/data'), name='fetch')
    wait_concrete_service('fetch', 'FetchCommunication')
    docker_run(
        'indexing-image',
        network=('container', 'fetch'), rm=True,
        v=('index', '/mnt/index'))

    yield 'index'

    force_remove_container('data-copy')
    force_remove_container('fetch')
    force_remove_volume('data')
    force_remove_volume('index')
    force_remove(zip_path)
    force_remove_image('indexing-image')
    force_remove_image('concrete-python-image')
Esempio n. 14
0
def test_fetch_backed_container():
    comm_container = {
        'one': create_comm('one'),
        'two': create_comm('two')
    }

    impl = CommunicationContainerFetchHandler(comm_container)
    host = 'localhost'
    port = find_port()

    with SubprocessFetchCommunicationServiceWrapper(impl, host, port):
        cc = FetchBackedCommunicationContainer(host, port)
        assert len(cc) == 2
        assert 'one' in cc
        assert 'two' in cc
        for comm_id in cc:
            comm = cc[comm_id]
            assert validate_communication(comm)
Esempio n. 15
0
def test_create_comm_one_sentence_al_section():
    comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_SECTION)
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 14 == sect.textSpan.ending
    assert sect.sentenceList is None
    assert validate_communication(comm)
Esempio n. 16
0
def test_create_comm_one_sentence_al_section():
    comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_SECTION)
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 14 == sect.textSpan.ending
    assert sect.sentenceList is None
    assert validate_communication(comm)
Esempio n. 17
0
def test_lattice_with_token_list_kind():
    comm = create_comm('comm-1', 'mambo no. 4')
    tokenization = comm.sectionList[0].sentenceList[0].tokenization
    lattice_path = LatticePath()
    lattice_path.tokenList = [Token(tokenIndex=0, text='mambo'),
                              Token(tokenIndex=0, text='no.'),
                              Token(tokenIndex=0, text='3')]
    token_lattice = TokenLattice()
    token_lattice.cachedBestPath = lattice_path
    tokenization.lattice = token_lattice
    token_texts = [t.text for t in get_tokens(tokenization)]
    assert ['mambo', 'no.', '4'] == token_texts
Esempio n. 18
0
def test_create_comm_unicode_al_sentence():
    comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 5 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 5 == sent.textSpan.ending
    assert sent.tokenization is None
    assert validate_communication(comm)
Esempio n. 19
0
def test_create_comm_unicode_al_sentence():
    comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 5 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 5 == sent.textSpan.ending
    assert sent.tokenization is None
    assert validate_communication(comm)
Esempio n. 20
0
def test_lattice_with_token_list_kind():
    comm = create_comm('comm-1', 'mambo no. 4')
    tokenization = comm.sectionList[0].sentenceList[0].tokenization
    lattice_path = LatticePath()
    lattice_path.tokenList = [
        Token(tokenIndex=0, text='mambo'),
        Token(tokenIndex=0, text='no.'),
        Token(tokenIndex=0, text='3')
    ]
    token_lattice = TokenLattice()
    token_lattice.cachedBestPath = lattice_path
    tokenization.lattice = token_lattice
    token_texts = [t.text for t in get_tokens(tokenization)]
    assert ['mambo', 'no.', '4'] == token_texts
Esempio n. 21
0
def test_communication_deep_copy():
    comm1 = create_comm('a-b-c', text='foo bar baz .')
    comm2 = communication_deep_copy(comm1)
    comm3 = communication_deep_copy(comm1)
    assert_simple_comms_equal(comm1, comm2)
    assert_simple_comms_equal(comm2, comm3)
    tkzn1 = comm1.sectionList[0].sentenceList[0].tokenization
    tkzn1.tokenList.tokenList[0] = Token(text='bbq', tokenIndex=0)
    tkzn2 = comm2.sectionList[0].sentenceList[0].tokenization
    assert list(map(
        lambda t: t.text, tkzn1.tokenList.tokenList
    )) != list(map(
        lambda t: t.text, tkzn2.tokenList.tokenList
    ))
    assert_simple_comms_equal(comm2, comm3)
Esempio n. 22
0
def test_create_comm_complex():
    comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n')
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert 2 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 2 == sect.textSpan.start
    assert 23 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 2 == sent.textSpan.start
    assert 16 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 3 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'simple' == tl[0].text
    assert 'simple' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending]
    assert 1 == tl[1].tokenIndex
    assert 'comm' == tl[1].text
    assert 'comm' == comm.text[tl[1].textSpan.start:tl[1].textSpan.ending]
    assert 2 == tl[2].tokenIndex
    assert '.' == tl[2].text
    assert '.' == comm.text[tl[2].textSpan.start:tl[2].textSpan.ending]
    sent = sect.sentenceList[1]
    assert 17 == sent.textSpan.start
    assert 23 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 2 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'or' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert '...' == tl[1].text

    sect = comm.sectionList[1]
    assert 25 == sect.textSpan.start
    assert 30 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 25 == sent.textSpan.start
    assert 30 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 1 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'isit?' == tl[0].text
    assert 'isit?' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending]

    assert validate_communication(comm)
Esempio n. 23
0
def test_create_comm_complex():
    comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n')
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert 2 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 2 == sect.textSpan.start
    assert 23 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 2 == sent.textSpan.start
    assert 16 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 3 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'simple' == tl[0].text
    assert 'simple' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending]
    assert 1 == tl[1].tokenIndex
    assert 'comm' == tl[1].text
    assert 'comm' == comm.text[tl[1].textSpan.start:tl[1].textSpan.ending]
    assert 2 == tl[2].tokenIndex
    assert '.' == tl[2].text
    assert '.' == comm.text[tl[2].textSpan.start:tl[2].textSpan.ending]
    sent = sect.sentenceList[1]
    assert 17 == sent.textSpan.start
    assert 23 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 2 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'or' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert '...' == tl[1].text

    sect = comm.sectionList[1]
    assert 25 == sect.textSpan.start
    assert 30 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 25 == sent.textSpan.start
    assert 30 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 1 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'isit?' == tl[0].text
    assert 'isit?' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending]

    assert validate_communication(comm)
Esempio n. 24
0
def test_print_conll_missing_tags(capsys):
    # We don't use comm_with_other_tags() here because we want to test
    # the case where:
    #   tokenization.TokenTaggingList = None
    comm = create_comm(
        'quick', '''\
The quick brown fox jumped
over the lazy dog .

Or did she ?
''')
    print_conll_style_tags_for_communication(comm, ner=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith('INDEX\tTOKEN\n'
                          '-----\t-----\n'
                          '1\tThe\n'
                          '2\tquick\n')
Esempio n. 25
0
def test_print_conll_missing_tags(capsys):
    # We don't use comm_with_other_tags() here because we want to test
    # the case where:
    #   tokenization.TokenTaggingList = None
    comm = create_comm('quick', '''\
The quick brown fox jumped
over the lazy dog .

Or did she ?
''')
    print_conll_style_tags_for_communication(comm, ner=True)
    (out, err) = capsys.readouterr()
    assert err == ''
    assert out.startswith(
        'INDEX\tTOKEN\n'
        '-----\t-----\n'
        '1\tThe\n'
        '2\tquick\n'
    )
Esempio n. 26
0
def test_create_comm_unicode():
    comm = create_comm('one', u'狐狸\t\t.')
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 5 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 5 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 2 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert u'狐狸' == tl[0].text
    assert u'狐狸' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending]
    assert 1 == tl[1].tokenIndex
    assert '.' == tl[1].text
    assert validate_communication(comm)
Esempio n. 27
0
def test_create_comm_unicode():
    comm = create_comm('one', u'狐狸\t\t.')
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 5 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 5 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 2 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert u'狐狸' == tl[0].text
    assert u'狐狸' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending]
    assert 1 == tl[1].tokenIndex
    assert '.' == tl[1].text
    assert validate_communication(comm)
Esempio n. 28
0
def test_create_comm_complex_sections():
    comm = create_comm('one', '\n\n\nFOO\r\n\r\n\n\n\nBAR\n\nFU\nBAR\n\t\n\n   \n')
    assert 'one' == comm.id
    assert '\n\n\nFOO\r\n\r\n\n\n\nBAR\n\nFU\nBAR\n\t\n\n   \n' == comm.text
    assert 3 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 3 == sect.textSpan.start
    assert 6 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)

    sect = comm.sectionList[1]
    assert 13 == sect.textSpan.start
    assert 16 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)

    sect = comm.sectionList[2]
    assert 18 == sect.textSpan.start
    assert 24 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
Esempio n. 29
0
def test_create_comm_complex_sections():
    comm = create_comm('one',
                       '\n\n\nFOO\r\n\r\n\n\n\nBAR\n\nFU\nBAR\n\t\n\n   \n')
    assert 'one' == comm.id
    assert '\n\n\nFOO\r\n\r\n\n\n\nBAR\n\nFU\nBAR\n\t\n\n   \n' == comm.text
    assert 3 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 3 == sect.textSpan.start
    assert 6 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)

    sect = comm.sectionList[1]
    assert 13 == sect.textSpan.start
    assert 16 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)

    sect = comm.sectionList[2]
    assert 18 == sect.textSpan.start
    assert 24 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
Esempio n. 30
0
def test_create_comm_one_sentence():
    comm = create_comm('one', 'simple comm\t\t.')
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 14 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 14 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 3 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'simple' == tl[0].text
    assert 'simple' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending]
    assert 1 == tl[1].tokenIndex
    assert 'comm' == tl[1].text
    assert 'comm' == comm.text[tl[1].textSpan.start:tl[1].textSpan.ending]
    assert 2 == tl[2].tokenIndex
    assert '.' == tl[2].text
    assert '.' == comm.text[tl[2].textSpan.start:tl[2].textSpan.ending]
    assert validate_communication(comm)
Esempio n. 31
0
def test_create_comm_one_sentence():
    comm = create_comm('one', 'simple comm\t\t.')
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 14 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 14 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 3 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'simple' == tl[0].text
    assert 'simple' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending]
    assert 1 == tl[1].tokenIndex
    assert 'comm' == tl[1].text
    assert 'comm' == comm.text[tl[1].textSpan.start:tl[1].textSpan.ending]
    assert 2 == tl[2].tokenIndex
    assert '.' == tl[2].text
    assert '.' == comm.text[tl[2].textSpan.start:tl[2].textSpan.ending]
    assert validate_communication(comm)
Esempio n. 32
0
def test_create_comm_one_sentence_al_none():
    comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Esempio n. 33
0
def test_create_comm_empty():
    comm = create_comm('one')
    assert 'one' == comm.id
    assert '' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Esempio n. 34
0
def test_no_lattice_with_no_kind():
    comm = create_comm('comm-1', 'mambo no. 4')
    tokenization = comm.sectionList[0].sentenceList[0].tokenization
    token_texts = [t.text for t in get_tokens(tokenization)]
    assert ['mambo', 'no.', '4'] == token_texts
Esempio n. 35
0
def test_create_comm_ws_al_sentence():
    comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Esempio n. 36
0
def test_no_lattice_with_no_kind():
    comm = create_comm('comm-1', 'mambo no. 4')
    tokenization = comm.sectionList[0].sentenceList[0].tokenization
    token_texts = [t.text for t in get_tokens(tokenization)]
    assert ['mambo', 'no.', '4'] == token_texts
Esempio n. 37
0
def test_create_comm_unicode_al_none():
    comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Esempio n. 38
0
def test_create_comm_unicode_al_none():
    comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Esempio n. 39
0
def test_create_comm_ws_al_none():
    comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Esempio n. 40
0
def test_create_comm_empty_al_section():
    comm = create_comm('one', annotation_level=AL_SECTION)
    assert 'one' == comm.id
    assert '' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Esempio n. 41
0
def test_create_comm_empty():
    comm = create_comm('one')
    assert 'one' == comm.id
    assert '' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Esempio n. 42
0
def test_create_comm_ws_al_sentence():
    comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Esempio n. 43
0
def test_create_comm_ws():
    comm = create_comm('one', '\t \t\r\n\n')
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Esempio n. 44
0
def test_create_comm_ws_al_none():
    comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Esempio n. 45
0
def comm_id_and_buf(request):
    comm_id = 'temp comm'
    return (comm_id, write_communication_to_buffer(create_comm(comm_id)))
def comm_id_and_buf(request):
    comm_id = 'temp comm'
    return (comm_id, write_communication_to_buffer(create_comm(comm_id)))
Esempio n. 47
0
def test_create_comm_ws():
    comm = create_comm('one', '\t \t\r\n\n')
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Esempio n. 48
0
def test_create_comm_empty_al_section():
    comm = create_comm('one', annotation_level=AL_SECTION)
    assert 'one' == comm.id
    assert '' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Esempio n. 49
0
def test_create_comm_one_sentence_al_none():
    comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)