Beispiel #1
0
 def setUp(self):
     self.comm1 = create_comm('comm-1')
     self.comm2 = create_comm('comm-2')
     self.comm3 = create_comm('comm-3')
     self.buf1 = write_communication_to_buffer(self.comm1)
     self.buf2 = write_communication_to_buffer(self.comm2)
     self.buf3 = write_communication_to_buffer(self.comm3)
Beispiel #2
0
def load(path, per_line, annotation_level):
    '''
    Generate communications constructed from text files in specified
    tarball, assigning ids that are meaningful tar-friendly filenames.

    If per_line is True:
    One communication is created for each newline in a file.  Note blank
    lines will produce communications.  The trailing newline is included
    in the communication text.  If a file does not have a terminating
    newline, a communication is nonetheless produced for the last line,
    and a newline is appended to the end of the text.
    '''
    with tarfile.open(path, 'r|*') as tf:
        ti = tf.next()
        while ti is not None:
            if ti.isfile():
                f = tf.extractfile(ti)
                text = f.read().decode('utf-8')

                if per_line:
                    if text.endswith('\n'):
                        text = text[:-1]
                    for (i, line) in enumerate(text.split('\n')):
                        yield create_comm(u'%s/%d' % (ti.name, i),
                                          line + u'\n',
                                          annotation_level=annotation_level)
                else:
                    yield create_comm(ti.name, text,
                                      annotation_level=annotation_level)

            tf.members = []
            ti = tf.next()
Beispiel #3
0
def main():
    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Convert text file to communication',
    )
    parser.set_defaults(annotation_level=AL_NONE)
    parser.add_argument('text_path', type=str,
                        help='Input text file path (- for stdin)')
    parser.add_argument('concrete_path', type=str,
                        help='Output concrete file path (- for stdout)')
    add_annotation_level_argparse_argument(parser)
    concrete.version.add_argparse_argument(parser)
    ns = parser.parse_args()

    # Won't work on Windows... but that use case is very unlikely
    text_path = '/dev/fd/0' if ns.text_path == '-' else ns.text_path
    concrete_path = (
        '/dev/fd/1' if ns.concrete_path == '-' else ns.concrete_path
    )
    annotation_level = ns.annotation_level

    with codecs.open(text_path, encoding='utf-8') as f:
        comm = create_comm(text_path, f.read(),
                           annotation_level=annotation_level)
        write_communication_to_file(comm, concrete_path)
Beispiel #4
0
def main():
    set_stdout_encoding()

    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Convert text file to communication',
    )
    parser.set_defaults(annotation_level=AL_NONE)
    parser.add_argument('text_path', type=str,
                        help='Input text file path (- for stdin)')
    parser.add_argument('concrete_path', type=str,
                        help='Output concrete file path (- for stdout)')
    add_annotation_level_argparse_argument(parser)
    parser.add_argument('-l', '--loglevel', '--log-level',
                        help='Logging verbosity level threshold (to stderr)',
                        default='info')
    concrete.version.add_argparse_argument(parser)
    args = parser.parse_args()

    logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s',
                        level=args.loglevel.upper())

    # Won't work on Windows
    text_path = '/dev/fd/0' if args.text_path == '-' else args.text_path
    concrete_path = (
        '/dev/fd/1' if args.concrete_path == '-' else args.concrete_path
    )
    annotation_level = args.annotation_level

    with codecs.open(text_path, encoding='utf-8') as f:
        comm = create_comm(text_path, f.read(),
                           annotation_level=annotation_level)
        write_communication_to_file(comm, concrete_path)
Beispiel #5
0
def main():
    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Convert text file to communication',
    )
    parser.set_defaults(annotation_level=AL_NONE)
    parser.add_argument('text_path',
                        type=str,
                        help='Input text file path (- for stdin)')
    parser.add_argument('concrete_path',
                        type=str,
                        help='Output concrete file path (- for stdout)')
    add_annotation_level_argparse_argument(parser)
    concrete.version.add_argparse_argument(parser)
    ns = parser.parse_args()

    # Won't work on Windows... but that use case is very unlikely
    text_path = '/dev/fd/0' if ns.text_path == '-' else ns.text_path
    concrete_path = ('/dev/fd/1'
                     if ns.concrete_path == '-' else ns.concrete_path)
    annotation_level = ns.annotation_level

    with codecs.open(text_path, encoding='utf-8') as f:
        comm = create_comm(text_path,
                           f.read(),
                           annotation_level=annotation_level)
        write_communication_to_file(comm, concrete_path)
Beispiel #6
0
def test_create_comm_complex_al_none():
    comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n',
                       annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
def test_create_comm_complex_al_section():
    comm = create_comm('one',
                       '\n\nsimple comm\t\t.\nor ...\n\nisit?\n',
                       annotation_level=AL_SECTION)
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text

    assert 3 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 0 == sect.textSpan.ending
    assert sect.sentenceList is None

    sect = comm.sectionList[1]
    assert 2 == sect.textSpan.start
    assert 23 == sect.textSpan.ending
    assert sect.sentenceList is None

    sect = comm.sectionList[2]
    assert 25 == sect.textSpan.start
    assert 31 == sect.textSpan.ending
    assert sect.sentenceList is None

    assert validate_communication(comm)
 def test_read_write_fixed_point(self):
     comm = create_comm('comm-1')
     buf_1 = write_communication_to_buffer(comm)
     buf_2 = write_communication_to_buffer(
         read_communication_from_buffer(buf_1)
     )
     self.assertEquals(buf_1, buf_2)
    def test_annotate(self):
        impl = NoopAnnotator()
        host = 'localhost'
        port = find_port()
        timeout = 5

        comm_id = '1-2-3-4'
        comm = create_comm(comm_id)

        comm_uuid_uuidString = comm.uuid.uuidString
        comm_metadata_tool = comm.metadata.tool
        comm_metadata_timestamp = comm.metadata.timestamp

        with SubprocessAnnotatorServiceWrapper(impl,
                                               host,
                                               port,
                                               timeout=timeout):
            transport = TSocket.TSocket(host, port)
            transport = TTransport.TFramedTransport(transport)
            protocol = TCompactProtocol.TCompactProtocol(transport)

            cli = Annotator.Client(protocol)
            transport.open()
            res = cli.annotate(comm)
            transport.close()

            self.assertEqual(res.id, comm_id)
            self.assertEqual(res.uuid.uuidString, comm_uuid_uuidString)
            self.assertEqual(res.metadata.tool, comm_metadata_tool)
            self.assertEqual(res.metadata.timestamp, comm_metadata_timestamp)
Beispiel #10
0
    def test_annotate(self):
        impl = NoopAnnotateCommunicationService()
        host = 'localhost'
        port = find_port()
        timeout = 5

        comm_id = '1-2-3-4'
        comm = create_comm(comm_id)

        comm_uuid_uuidString = comm.uuid.uuidString
        comm_metadata_tool = comm.metadata.tool
        comm_metadata_timestamp = comm.metadata.timestamp

        with SubprocessAnnotateCommunicationServiceWrapper(impl, host, port,
                                                           timeout=timeout):
            transport = TSocket.TSocket(host, port)
            transport = TTransport.TFramedTransport(transport)
            protocol = TCompactProtocol.TCompactProtocolAccelerated(transport)

            cli = AnnotateCommunicationService.Client(protocol)
            transport.open()
            res = cli.annotate(comm)
            transport.close()

            self.assertEqual(res.id, comm_id)
            self.assertEqual(res.uuid.uuidString, comm_uuid_uuidString)
            self.assertEqual(res.metadata.tool, comm_metadata_tool)
            self.assertEqual(res.metadata.timestamp, comm_metadata_timestamp)
def test_create_comm_complex_al_none():
    comm = create_comm('one',
                       '\n\nsimple comm\t\t.\nor ...\n\nisit?\n',
                       annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Beispiel #12
0
def test_create_comm_one_sentence_al_section():
    comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_SECTION)
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 14 == sect.textSpan.ending
    assert sect.sentenceList is None
    assert validate_communication(comm)
def test_create_comm_one_sentence_al_section():
    comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_SECTION)
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 14 == sect.textSpan.ending
    assert sect.sentenceList is None
    assert validate_communication(comm)
Beispiel #14
0
 def test_read_write_fixed_point(self):
     key = 'comm'
     comm = create_comm('comm-1')
     with RedisServer(loglevel='warning') as server:
         redis_db = Redis(port=server.port)
         buf_1 = write_communication_to_redis_key(redis_db, key, comm)
         buf_2 = write_communication_to_redis_key(
             redis_db, key,
             read_communication_from_redis_key(redis_db, key)
         )
         self.assertEquals(buf_1, buf_2)
def test_create_comm_complex():
    comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n')
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert 3 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 0 == sect.textSpan.ending
    assert 0 == len(sect.sentenceList)

    sect = comm.sectionList[1]
    assert 2 == sect.textSpan.start
    assert 23 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 2 == sent.textSpan.start
    assert 16 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 3 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'simple' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert 'comm' == tl[1].text
    assert 2 == tl[2].tokenIndex
    assert '.' == tl[2].text
    sent = sect.sentenceList[1]
    assert 17 == sent.textSpan.start
    assert 23 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 2 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'or' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert '...' == tl[1].text

    sect = comm.sectionList[2]
    assert 25 == sect.textSpan.start
    assert 31 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 25 == sent.textSpan.start
    assert 30 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 1 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'isit?' == tl[0].text
    sent = sect.sentenceList[1]
    assert 31 == sent.textSpan.start
    assert 31 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 0 == len(tl)

    assert validate_communication(comm)
Beispiel #16
0
def test_create_comm_complex():
    comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n')
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert 3 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 0 == sect.textSpan.ending
    assert 0 == len(sect.sentenceList)

    sect = comm.sectionList[1]
    assert 2 == sect.textSpan.start
    assert 23 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 2 == sent.textSpan.start
    assert 16 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 3 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'simple' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert 'comm' == tl[1].text
    assert 2 == tl[2].tokenIndex
    assert '.' == tl[2].text
    sent = sect.sentenceList[1]
    assert 17 == sent.textSpan.start
    assert 23 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 2 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'or' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert '...' == tl[1].text

    sect = comm.sectionList[2]
    assert 25 == sect.textSpan.start
    assert 31 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 25 == sent.textSpan.start
    assert 30 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 1 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'isit?' == tl[0].text
    sent = sect.sentenceList[1]
    assert 31 == sent.textSpan.start
    assert 31 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 0 == len(tl)

    assert validate_communication(comm)
Beispiel #17
0
def test_lattice_with_token_list_kind():
    comm = create_comm('comm-1', 'mambo no. 4')
    tokenization = comm.sectionList[0].sentenceList[0].tokenization
    lattice_path = LatticePath()
    lattice_path.tokenList = [Token(tokenIndex=0, text='mambo'),
                              Token(tokenIndex=0, text='no.'),
                              Token(tokenIndex=0, text='3')]
    token_lattice = TokenLattice()
    token_lattice.cachedBestPath = lattice_path
    tokenization.lattice = token_lattice
    token_texts = [t.text for t in get_tokens(tokenization)]
    assert ['mambo', 'no.', '4'] == token_texts
 def test_communication_deep_copy(self):
     comm1 = create_comm('a-b-c', text='foo bar baz .')
     comm2 = communication_deep_copy(comm1)
     comm3 = communication_deep_copy(comm1)
     self.assert_simple_comms_equal(comm1, comm2)
     self.assert_simple_comms_equal(comm2, comm3)
     tkzn1 = comm1.sectionList[0].sentenceList[0].tokenization
     tkzn1.tokenList.tokenList[0] = Token(text='bbq', tokenIndex=0)
     tkzn2 = comm2.sectionList[0].sentenceList[0].tokenization
     self.assertNotEqual(
         map(lambda t: t.text, tkzn1.tokenList.tokenList),
         map(lambda t: t.text, tkzn2.tokenList.tokenList),
     )
     self.assert_simple_comms_equal(comm2, comm3)
def test_create_comm_unicode_al_sentence():
    comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 5 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 5 == sent.textSpan.ending
    assert sent.tokenization is None
    assert validate_communication(comm)
Beispiel #20
0
def test_create_comm_unicode_al_sentence():
    comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 5 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 5 == sent.textSpan.ending
    assert sent.tokenization is None
    assert validate_communication(comm)
Beispiel #21
0
 def test_lattice_with_token_list_kind(self):
     comm = create_comm('comm-1', 'mambo no. 4')
     tokenization = comm.sectionList[0].sentenceList[0].tokenization
     lattice_path = LatticePath()
     lattice_path.tokenList = [
         Token(tokenIndex=0, text='mambo'),
         Token(tokenIndex=0, text='no.'),
         Token(tokenIndex=0, text='3')
     ]
     token_lattice = TokenLattice()
     token_lattice.cachedBestPath = lattice_path
     tokenization.lattice = token_lattice
     token_texts = [t.text for t in get_tokens(tokenization)]
     self.assertEqual(['mambo', 'no.', '4'], token_texts)
def test_create_comm_unicode():
    comm = create_comm('one', u'狐狸\t\t.')
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 5 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 5 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 2 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert u'狐狸' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert '.' == tl[1].text
    assert validate_communication(comm)
Beispiel #23
0
def test_create_comm_unicode():
    comm = create_comm('one', u'狐狸\t\t.')
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 5 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 5 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 2 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert u'狐狸' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert '.' == tl[1].text
    assert validate_communication(comm)
def test_create_comm_one_sentence():
    comm = create_comm('one', 'simple comm\t\t.')
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 14 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 14 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 3 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'simple' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert 'comm' == tl[1].text
    assert 2 == tl[2].tokenIndex
    assert '.' == tl[2].text
    assert validate_communication(comm)
Beispiel #25
0
def test_create_comm_one_sentence():
    comm = create_comm('one', 'simple comm\t\t.')
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert 1 == len(comm.sectionList)
    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 14 == sect.textSpan.ending
    assert 1 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 0 == sent.textSpan.start
    assert 14 == sent.textSpan.ending
    tl = sent.tokenization.tokenList.tokenList
    assert 3 == len(tl)
    assert 0 == tl[0].tokenIndex
    assert 'simple' == tl[0].text
    assert 1 == tl[1].tokenIndex
    assert 'comm' == tl[1].text
    assert 2 == tl[2].tokenIndex
    assert '.' == tl[2].text
    assert validate_communication(comm)
Beispiel #26
0
def test_create_comm_complex_al_sentence():
    comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n',
                       annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text
    assert 3 == len(comm.sectionList)

    sect = comm.sectionList[0]
    assert 0 == sect.textSpan.start
    assert 0 == sect.textSpan.ending
    assert 0 == len(sect.sentenceList)

    sect = comm.sectionList[1]
    assert 2 == sect.textSpan.start
    assert 23 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 2 == sent.textSpan.start
    assert 16 == sent.textSpan.ending
    assert sent.tokenization is None
    sent = sect.sentenceList[1]
    assert 17 == sent.textSpan.start
    assert 23 == sent.textSpan.ending
    assert sent.tokenization is None

    sect = comm.sectionList[2]
    assert 25 == sect.textSpan.start
    assert 31 == sect.textSpan.ending
    assert 2 == len(sect.sentenceList)
    sent = sect.sentenceList[0]
    assert 25 == sent.textSpan.start
    assert 30 == sent.textSpan.ending
    assert sent.tokenization is None
    sent = sect.sentenceList[1]
    assert 31 == sent.textSpan.start
    assert 31 == sent.textSpan.ending
    assert sent.tokenization is None

    assert validate_communication(comm)
Beispiel #27
0
def main():
    set_stdout_encoding()

    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Convert text file to communication',
    )
    parser.set_defaults(annotation_level=AL_NONE)
    parser.add_argument('text_path',
                        type=str,
                        help='Input text file path (- for stdin)')
    parser.add_argument('concrete_path',
                        type=str,
                        help='Output concrete file path (- for stdout)')
    add_annotation_level_argparse_argument(parser)
    parser.add_argument('-l',
                        '--loglevel',
                        '--log-level',
                        help='Logging verbosity level threshold (to stderr)',
                        default='info')
    concrete.version.add_argparse_argument(parser)
    args = parser.parse_args()

    logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s',
                        level=args.loglevel.upper())

    # Won't work on Windows
    text_path = '/dev/fd/0' if args.text_path == '-' else args.text_path
    concrete_path = ('/dev/fd/1'
                     if args.concrete_path == '-' else args.concrete_path)
    annotation_level = args.annotation_level

    with codecs.open(text_path, encoding='utf-8') as f:
        comm = create_comm(text_path,
                           f.read(),
                           annotation_level=annotation_level)
        write_communication_to_file(comm, concrete_path)
Beispiel #28
0
def test_no_lattice_with_no_kind():
    comm = create_comm('comm-1', 'mambo no. 4')
    tokenization = comm.sectionList[0].sentenceList[0].tokenization
    token_texts = [t.text for t in get_tokens(tokenization)]
    assert ['mambo', 'no.', '4'] == token_texts
Beispiel #29
0
This Python script is secretly a shell script.
"""

import os

from concrete.util import write_communication_to_file
from concrete.util.simple_comm import create_comm

text = 'Super simple sentence .'

n1 = 'simple_1.concrete'
n2 = 'simple_2.concrete'
n3 = 'simple_3.concrete'

write_communication_to_file(create_comm('one', text), n1)
write_communication_to_file(create_comm('two', text), n2)
write_communication_to_file(create_comm('three', text), n3)

os.system('gzip < %s > %s.gz' % (n1, n1))
os.system('bzip2 < %s > %s.bz2' % (n1, n1))
os.system('cat %s %s %s > simple_concatenated' % (n1, n2, n3))
os.system('gzip < simple_concatenated > simple_concatenated.gz')
os.system('bzip2 < simple_concatenated > simple_concatenated.bz2')
os.system('tar -cf simple.tar %s %s %s' % (n1, n2, n3))
os.system('tar -czf simple.tar.gz %s %s %s' % (n1, n2, n3))
os.system('tar -cjf simple.tar.bz2 %s %s %s' % (n1, n2, n3))
os.system('zip simple.zip %s %s %s' % (n1, n2, n3))
os.system('mkdir -p a/b a/c')
os.system('cp %s a/b/' % n1)
os.system('cp %s %s a/c/' % (n2, n3))
Beispiel #30
0
def test_create_comm_ws_al_none():
    comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Beispiel #31
0
 def test_no_lattice_with_no_kind(self):
     comm = create_comm('comm-1', 'mambo no. 4')
     tokenization = comm.sectionList[0].sentenceList[0].tokenization
     token_texts = [t.text for t in get_tokens(tokenization)]
     self.assertEqual(['mambo', 'no.', '4'], token_texts)
Beispiel #32
0
def _add_comm_to_list(sleep, port, comm_id, key):
    time.sleep(sleep)
    redis_db = Redis(port=port)
    comm = create_comm(comm_id)
    buf = write_communication_to_buffer(comm)
    redis_db.lpush(key, buf)
def test_create_comm_ws_al_none():
    comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
def test_create_comm_ws_al_sentence():
    comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Beispiel #35
0
def test_create_comm_one_sentence_al_none():
    comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Beispiel #36
0
def test_create_comm_unicode_al_none():
    comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Beispiel #37
0
def test_create_comm_empty_al_section():
    comm = create_comm('one', annotation_level=AL_SECTION)
    assert 'one' == comm.id
    assert '' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
def test_create_comm_empty_al_section():
    comm = create_comm('one', annotation_level=AL_SECTION)
    assert 'one' == comm.id
    assert '' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Beispiel #39
0
This Python script is secretly a shell script.
"""

import os

from concrete.util import write_communication_to_file
from concrete.util.simple_comm import create_comm

text = 'Super simple sentence .'

n1 = 'simple_1.concrete'
n2 = 'simple_2.concrete'
n3 = 'simple_3.concrete'

write_communication_to_file(create_comm('one', text), n1)
write_communication_to_file(create_comm('two', text), n2)
write_communication_to_file(create_comm('three', text), n3)

os.system('gzip < %s > %s.gz' % (n1, n1))
os.system('bzip2 < %s > %s.bz2' % (n1, n1))
os.system('cat %s %s %s > simple_concatenated' % (n1, n2, n3))
os.system('gzip < simple_concatenated > simple_concatenated.gz')
os.system('bzip2 < simple_concatenated > simple_concatenated.bz2')
os.system('tar -cf simple.tar %s %s %s' % (n1, n2, n3))
os.system('tar -czf simple.tar.gz %s %s %s' % (n1, n2, n3))
os.system('tar -cjf simple.tar.bz2 %s %s %s' % (n1, n2, n3))
os.system('zip simple.zip %s %s %s' % (n1, n2, n3))
os.system('mkdir -p a/b a/c')
os.system('cp %s a/b/' % n1)
os.system('cp %s %s a/c/' % (n2, n3))
def test_create_comm_empty():
    comm = create_comm('one')
    assert 'one' == comm.id
    assert '' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
def test_create_comm_unicode_al_none():
    comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert u'狐狸\t\t.' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Beispiel #42
0
def test_create_comm_ws_al_sentence():
    comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_SENTENCE)
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
def test_create_comm_ws():
    comm = create_comm('one', '\t \t\r\n\n')
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
def test_create_comm_one_sentence_al_none():
    comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_NONE)
    assert 'one' == comm.id
    assert 'simple comm\t\t.' == comm.text
    assert comm.sectionList is None
    assert validate_communication(comm)
Beispiel #45
0
 def setUp(self):
     self.comm1 = create_comm('comm-1')
     self.comm2 = create_comm('comm-2')
     self.comm3 = create_comm('comm-3')
Beispiel #46
0
def test_create_comm_empty():
    comm = create_comm('one')
    assert 'one' == comm.id
    assert '' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)
Beispiel #47
0
def test_create_comm_ws():
    comm = create_comm('one', '\t \t\r\n\n')
    assert 'one' == comm.id
    assert '\t \t\r\n\n' == comm.text
    assert [] == comm.sectionList
    assert validate_communication(comm)