def test_CommunicationWriterTGZ_single_file_default_name(output_file, login_info): comm = read_communication_from_file("tests/testdata/simple_1.concrete") writer = CommunicationWriterTGZ() try: writer.open(output_file) writer.write(comm) finally: writer.close() assert tarfile.is_tarfile(output_file) f = tarfile.open(output_file) tarinfo = f.next() assert tarinfo is not None assert comm.id + '.concrete' == tarinfo.name assert tarinfo.isreg() assert tarinfo.mtime > time() - TIME_MARGIN assert os.stat('tests/testdata/simple_1.concrete').st_size == tarinfo.size assert 0o644 == tarinfo.mode assert login_info['uid'] == tarinfo.uid assert login_info['username'] == tarinfo.uname assert login_info['gid'] == tarinfo.gid assert login_info['groupname'] == tarinfo.gname tarinfo = f.next() assert tarinfo is None f.close()
def test_CommunicationWriterTar_single_file_fixed_point_unicode(output_file, login_info): comm = read_communication_from_file( "tests/testdata/les-deux-chandeliers.concrete" ) with CommunicationWriterTar(output_file) as writer: writer.write(comm, "les-deux-chandeliers.concrete") assert tarfile.is_tarfile(output_file) f = tarfile.open(output_file) tarinfo = f.next() assert tarinfo is not None assert "les-deux-chandeliers.concrete" == tarinfo.name actual_data = f.extractfile(tarinfo).read() with open('tests/testdata/les-deux-chandeliers.concrete', 'rb') as expected_f: expected_data = expected_f.read() assert expected_data == actual_data tarinfo = f.next() assert tarinfo is None f.close()
def test_CommunicationWriterTar_single_file_ctx_mgr(output_file, login_info): comm = read_communication_from_file("tests/testdata/simple_1.concrete") with CommunicationWriterTar(output_file) as writer: writer.write(comm, "simple_1.concrete") assert tarfile.is_tarfile(output_file) f = tarfile.open(output_file) tarinfo = f.next() assert tarinfo is not None assert "simple_1.concrete" == tarinfo.name assert tarinfo.isreg() assert tarinfo.mtime > time() - TIME_MARGIN assert os.stat('tests/testdata/simple_1.concrete').st_size == tarinfo.size assert 0o644 == tarinfo.mode assert login_info['uid'] == tarinfo.uid assert login_info['username'] == tarinfo.uname assert login_info['gid'] == tarinfo.gid assert login_info['groupname'] == tarinfo.gname tarinfo = f.next() assert tarinfo is None f.close()
def communication_file_to_json(communication_filename): """ Takes a Communication filename, deserializes Communication from file, returns a JSON string with the information in that Communication. """ comm = read_communication_from_file(communication_filename) return communication_to_json(comm)
def add_chunks_to_file(in_file, out_file, chunklink, fail_on_error): '''Reads a Communication file, adds chunking information, and writes a new Communication file containing the annotated version.''' # Deserialize comm = read_communication_from_file(in_file) # Add chunks num_chunked, num_sents = add_chunks_to_comm(comm, chunklink, fail_on_error) logging.info("Chunked %d / %d = %f" % (num_chunked, num_sents, float(num_chunked) / float(num_sents))) # Serialize write_communication_to_file(comm, out_file)
def test_CommunicationWriter_gz_fixed_point_unicode(output_file): input_file = 'tests/testdata/les-deux-chandeliers.concrete' comm = read_communication_from_file(input_file) with CommunicationWriter(output_file, gzip=True) as writer: writer.write(comm) with open(input_file, 'rb') as expected_f: expected_data = expected_f.read() with gzip.open(output_file, 'rb') as actual_f: actual_data = actual_f.read() assert expected_data == actual_data
def test_CommunicationWriter_fixed_point_ctx_mgr(output_file): input_file = 'tests/testdata/simple_1.concrete' comm = read_communication_from_file(input_file) with CommunicationWriter(output_file) as writer: writer.write(comm) with open(input_file, 'rb') as expected_f: expected_data = expected_f.read() with open(output_file, 'rb') as actual_f: actual_data = actual_f.read() assert expected_data == actual_data
def main(): set_stdout_encoding() parser = argparse.ArgumentParser( description="Pretty Print a Concrete file") parser.add_argument('--concrete_type', default='communication', choices=['communication', 'tokenlattice'], help='Default: communication') parser.add_argument('--protocol', default='simple', choices=['simple', 'TJSONProtocol'], help='Default: simple') parser.add_argument('--remove-timestamps', action='store_true', help="Removes timestamps from JSON output") parser.add_argument('--remove-uuids', action='store_true', help="Removes UUIDs from JSON output") parser.add_argument('-l', '--loglevel', '--log-level', help='Logging verbosity level threshold (to stderr)', default='info') parser.add_argument('concrete_file', help='path to input concrete communication file') parser.add_argument('json_file', nargs='?', default='-', help='path to output json file') concrete.version.add_argparse_argument(parser) args = parser.parse_args() logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s', level=args.loglevel.upper()) if args.protocol == 'simple': if args.concrete_type == 'communication': json_communication = communication_file_to_json( args.concrete_file, remove_timestamps=args.remove_timestamps, remove_uuids=args.remove_uuids ) else: json_communication = tokenlattice_file_to_json( args.concrete_file ) else: if args.concrete_type == 'communication': comm = read_communication_from_file(args.concrete_file) json_communication = TSerialization.serialize( comm, TJSONProtocol.TJSONProtocolFactory()).decode('utf-8') else: raise NotImplementedError if args.json_file == '-': print(json_communication) else: with codecs.open(args.json_file, 'w', encoding='utf-8') as f: f.write(json_communication)
def test_CommunicationWriterZip_single_file_ctx_mgr(output_file, login_info): comm = read_communication_from_file("tests/testdata/simple_1.concrete") with CommunicationWriterZip(output_file) as writer: writer.write(comm, "simple_1.concrete") assert zipfile.is_zipfile(output_file) f = zipfile.ZipFile(output_file) [zipinfo] = f.infolist() assert "simple_1.concrete" == zipinfo.filename assert timegm(zipinfo.date_time) > timegm(localtime()) - TIME_MARGIN assert os.stat('tests/testdata/simple_1.concrete').st_size == zipinfo.file_size f.close()
def test_CommunicationWriter_gz_fixed_point(output_file): input_file = 'tests/testdata/simple_1.concrete' comm = read_communication_from_file(input_file) writer = CommunicationWriter(gzip=True) try: writer.open(output_file) writer.write(comm) finally: writer.close() with open(input_file, 'rb') as expected_f: expected_data = expected_f.read() with gzip.open(output_file, 'rb') as actual_f: actual_data = actual_f.read() assert expected_data == actual_data
def communication_file_to_json(communication_filename, remove_timestamps=False, remove_uuids=False): """Get a "pretty-printed" JSON string representation for a Communication Args: - `communication_filename`: String specifying Communication filename - `remove_uuids`: Boolean flag indicating if Concrete UUIDs should be removed Returns: - A string containing a "pretty-printed" JSON representation of the Communication """ comm = read_communication_from_file(communication_filename) return thrift_to_json(comm, remove_timestamps=remove_timestamps, remove_uuids=remove_uuids)
def main(): # Make stdout output UTF-8, preventing "'ascii' codec can't encode" errors sys.stdout = codecs.getwriter('utf8')(sys.stdout) parser = argparse.ArgumentParser( description="Pretty Print a Concrete file") parser.add_argument('--concrete_type', default='communication', choices=['communication', 'tokenlattice'], help='Default: communication') parser.add_argument('--protocol', default='simple', choices=['simple', 'TJSONProtocol'], help='Default: simple') parser.add_argument('--remove-timestamps', action='store_true', help="Removes timestamps from JSON output") parser.add_argument('--remove-uuids', action='store_true', help="Removes UUIDs from JSON output") parser.add_argument('concrete_file') parser.add_argument('json_file', nargs='?', default='STDOUT') concrete.version.add_argparse_argument(parser) args = parser.parse_args() if args.protocol == 'simple': if args.concrete_type == 'communication': json_communication = communication_file_to_json( args.concrete_file, remove_timestamps=args.remove_timestamps, remove_uuids=args.remove_uuids ) else: json_communication = tokenlattice_file_to_json(args.concrete_file) else: if args.concrete_type == 'communication': comm = read_communication_from_file(args.concrete_file) json_communication = TSerialization.serialize( comm, TJSONProtocol.TJSONProtocolFactory()) else: raise NotImplementedError if args.json_file == 'STDOUT': print json_communication else: f = codecs.open(args.json_file, "w", encoding="utf-8") f.write(json_communication) f.close()
def test_CommunicationWriterZip_single_file_fixed_point(output_file, login_info): comm = read_communication_from_file("tests/testdata/simple_1.concrete") with CommunicationWriterZip(output_file) as writer: writer.write(comm, "simple_1.concrete") assert zipfile.is_zipfile(output_file) f = zipfile.ZipFile(output_file) [zipinfo] = f.infolist() assert "simple_1.concrete" == zipinfo.filename actual_data = f.open(zipinfo).read() with open('tests/testdata/simple_1.concrete', 'rb') as expected_f: expected_data = expected_f.read() assert expected_data == actual_data f.close()
def test_CommunicationWriterZip_single_file_default_name(output_file, login_info): comm = read_communication_from_file("tests/testdata/simple_1.concrete") writer = CommunicationWriterZip() try: writer.open(output_file) writer.write(comm) finally: writer.close() assert zipfile.is_zipfile(output_file) f = zipfile.ZipFile(output_file) [zipinfo] = f.infolist() assert comm.id + '.concrete' == zipinfo.filename assert timegm(zipinfo.date_time) > timegm(localtime()) - TIME_MARGIN assert os.stat('tests/testdata/simple_1.concrete').st_size == zipinfo.file_size f.close()
def read_test_comm(): communication_filename = "tests/testdata/serif_dog-bites-man.concrete" return read_communication_from_file(communication_filename)