def test_append(self): print('') print('TEST APPEND') print('===========') print('') correct = 0 for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE): for codec in CODECS_TO_VALIDATE: print('') print('SCHEMA NUMBER %d' % (i + 1)) print('================') print('') print('Schema: %s' % example_schema) print('Datum: %s' % datum) print('Codec: %s' % codec) # write data in binary to file once writer = open(FILENAME, 'wb') datum_writer = io.DatumWriter() schema_object = schema.parse(example_schema) dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec) dfw.append(datum) dfw.close() # open file, write, and close nine times for i in range(9): writer = open(FILENAME, 'ab+') dfw = datafile.DataFileWriter(writer, io.DatumWriter()) dfw.append(datum) dfw.close() # read data in binary from file reader = open(FILENAME, 'rb') datum_reader = io.DatumReader() dfr = datafile.DataFileReader(reader, datum_reader) appended_data = [] for datum in dfr: appended_data.append(datum) print('Appended Data: %s' % appended_data) print('Appended Data Length: %d' % len(appended_data)) is_correct = [datum] * 10 == appended_data if is_correct: correct += 1 print('Correct Appended: %s' % is_correct) print('') os.remove(FILENAME) self.assertEqual(correct, len(CODECS_TO_VALIDATE)*len(SCHEMAS_TO_VALIDATE))
def test_unknown_symbol(self): print_name('TEST UNKNOWN SYMBOL') writers_schema = schema.parse("""\ {"type": "enum", "name": "Test", "symbols": ["FOO", "BAR"]}""") datum_to_write = 'FOO' readers_schema = schema.parse("""\ {"type": "enum", "name": "Test", "symbols": ["BAR", "BAZ"]}""") with self.assertRaises(io.SchemaResolutionException) as context: writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema) reader = StringIO(writer.getvalue()) decoder = io.BinaryDecoder(reader) datum_reader = io.DatumReader(writers_schema, readers_schema)
def __init__(self, writer, datum_writer, writers_schema=None, codec='null'): """ If the schema is not present, presume we're appending. @param writer: File-like object to write into. """ self._writer = writer self._encoder = io.BinaryEncoder(writer) self._datum_writer = datum_writer self._buffer_writer = StringIO() self._buffer_encoder = io.BinaryEncoder(self._buffer_writer) self._block_count = 0 self._meta = {} self._header_written = False if writers_schema is not None: if codec not in VALID_CODECS: raise DataFileException("Unknown codec: %r" % codec) self._sync_marker = DataFileWriter.generate_sync_marker() self.set_meta('avro.codec', codec) self.set_meta('avro.schema', str(writers_schema)) self.datum_writer.writers_schema = writers_schema else: if writer.mode: if writer.mode not in ('rb+', 'ab+'): raise DataFileException("When appending records to an Avro data file, the file object passed into DataFileWriter must be opened in read/write mode, e.g. for files: \"rb+\" or \"ab+\"") else: if not (writer.readable() and writer.writable()): raise DataFileException("When appending records to an Avro data file, the file object passed into DataFileWriter must be opened in read/write mode, e.g. for files: \"rb+\" or \"ab+\"") # open writer for reading to collect metadata dfr = DataFileReader(writer, io.DatumReader()) # TODO(hammer): collect arbitrary metadata # collect metadata self._sync_marker = dfr.sync_marker self.set_meta('avro.codec', dfr.get_meta('avro.codec')) # get schema used to write existing file schema_from_file = dfr.get_meta('avro.schema') self.set_meta('avro.schema', schema_from_file) self.datum_writer.writers_schema = schema.parse(schema_from_file) # seek to the end of the file and prepare for writing writer.seek(0, 2) self._header_written = True
def test_round_trip(self): print('') print('TEST ROUND TRIP') print('===============') print('') correct = 0 print(SCHEMAS_TO_VALIDATE) for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE): for codec in CODECS_TO_VALIDATE: print('') print('SCHEMA NUMBER %d' % (i + 1)) print('================') print('') print('Schema: %s' % example_schema) print('Datum: %s' % datum) print('Codec: %s' % codec) # write data in binary to file 10 times writer = open(FILENAME, 'wb') datum_writer = io.DatumWriter() schema_object = schema.parse(example_schema) dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec) for datum_counter in range(10): dfw.append(datum) dfw.close() # read data in binary from file reader = open(FILENAME, 'rb') datum_reader = io.DatumReader() dfr = datafile.DataFileReader(reader, datum_reader) round_trip_data = [] for read_datum in dfr: round_trip_data.append(read_datum) print('Round Trip Data: %s' % round_trip_data) print('Round Trip Data Length: %d' % len(round_trip_data)) is_correct = [datum] * 10 == round_trip_data if is_correct: correct += 1 print('Correct Round Trip: %s' % is_correct) print('') os.remove(FILENAME) self.assertEquals(correct, len(CODECS_TO_VALIDATE) * len(SCHEMAS_TO_VALIDATE))
def test_metadata(self): # Test the writer with a 'with' statement. writer = open(FILENAME, 'wb') datum_writer = io.DatumWriter() sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1] schema_object = schema.parse(sample_schema) with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw: dfw.set_meta('test.string', 'foo') dfw.set_meta('test.number', '1') dfw.append(sample_datum) self.assertTrue(writer.closed) # Test the reader with a 'with' statement. datums = [] reader = open(FILENAME, 'rb') datum_reader = io.DatumReader() with datafile.DataFileReader(reader, datum_reader) as dfr: self.assertEquals('foo', dfr.get_meta('test.string')) self.assertEquals('1', dfr.get_meta('test.number')) for datum in dfr: datums.append(datum) self.assertTrue(reader.closed)
def test_context_manager(self): # Context manager was introduced as a first class # member only in Python 2.6 and above. import sys if sys.version_info < (2,6): print('Skipping context manager tests on this Python version.') return # Test the writer with a 'with' statement. writer = open(FILENAME, 'wb') datum_writer = io.DatumWriter() sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1] schema_object = schema.parse(sample_schema) with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw: dfw.append(sample_datum) self.assertTrue(writer.closed) # Test the reader with a 'with' statement. datums = [] reader = open(FILENAME, 'rb') datum_reader = io.DatumReader() with datafile.DataFileReader(reader, datum_reader) as dfr: for datum in dfr: datums.append(datum) self.assertTrue(reader.closed)
def read_datum(buffer, writers_schema, readers_schema=None): reader = StringIO(buffer.getvalue()) decoder = io.BinaryDecoder(reader) datum_reader = io.DatumReader(writers_schema, readers_schema) return datum_reader.read(decoder)
def read_request(self, writers_schema, readers_schema, decoder): datum_reader = io.DatumReader(writers_schema, readers_schema) return datum_reader.read(decoder)
def read_error(self, writers_schema, readers_schema, decoder): datum_reader = io.DatumReader(writers_schema, readers_schema) return AvroRemoteException(datum_reader.read(decoder))
def read_response(self, writers_schema, readers_schema, decoder): datum_reader = io.DatumReader(writers_schema, readers_schema) result = datum_reader.read(decoder) return result
# Handshake schema is pulled in during build with open('./ipc/HandshakeRequest.avsc', 'rb') as handshake_request: HANDSHAKE_REQUEST_SCHEMA = schema.parse( handshake_request.read().decode('utf-8')) # """ # """) with open('./ipc/HandshakeResponse.avsc', 'rb') as handshake_response: HANDSHAKE_RESPONSE_SCHEMA = schema.parse( handshake_response.read().decode('utf-8')) # HANDSHAKE_RESPONSE_SCHEMA = schema.parse(""" # @HANDSHAKE_RESPONSE_SCHEMA@ # """) HANDSHAKE_REQUESTOR_WRITER = io.DatumWriter(HANDSHAKE_REQUEST_SCHEMA) HANDSHAKE_REQUESTOR_READER = io.DatumReader(HANDSHAKE_RESPONSE_SCHEMA) HANDSHAKE_RESPONDER_WRITER = io.DatumWriter(HANDSHAKE_RESPONSE_SCHEMA) HANDSHAKE_RESPONDER_READER = io.DatumReader(HANDSHAKE_REQUEST_SCHEMA) META_SCHEMA = schema.parse('{"type": "map", "values": "bytes"}') META_WRITER = io.DatumWriter(META_SCHEMA) META_READER = io.DatumReader(META_SCHEMA) SYSTEM_ERROR_SCHEMA = schema.parse('["string"]') # protocol cache REMOTE_HASHES = {} REMOTE_PROTOCOLS = {} BIG_ENDIAN_INT_STRUCT = io.struct_class('!I') BUFFER_HEADER_LENGTH = 4