예제 #1
0
    def test_append(self):
        print('')
        print('TEST APPEND')
        print('===========')
        print('')
        correct = 0
        for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
            for codec in CODECS_TO_VALIDATE:
                print('')
                print('SCHEMA NUMBER %d' % (i + 1))
                print('================')
                print('')
                print('Schema: %s' % example_schema)
                print('Datum: %s' % datum)
                print('Codec: %s' % codec)

                # write data in binary to file once
                writer = open(FILENAME, 'wb')
                datum_writer = io.DatumWriter()
                schema_object = schema.parse(example_schema)
                dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec)
                dfw.append(datum)
                dfw.close()

                # open file, write, and close nine times
                for i in range(9):
                    writer = open(FILENAME, 'ab+')
                    dfw = datafile.DataFileWriter(writer, io.DatumWriter())
                    dfw.append(datum)
                    dfw.close()

                # read data in binary from file
                reader = open(FILENAME, 'rb')
                datum_reader = io.DatumReader()
                dfr = datafile.DataFileReader(reader, datum_reader)
                appended_data = []
                for datum in dfr:
                    appended_data.append(datum)

                print('Appended Data: %s' % appended_data)
                print('Appended Data Length: %d' % len(appended_data))
                is_correct = [datum] * 10 == appended_data
                if is_correct:
                    correct += 1
                print('Correct Appended: %s' % is_correct)
                print('')
        os.remove(FILENAME)
        self.assertEqual(correct, len(CODECS_TO_VALIDATE)*len(SCHEMAS_TO_VALIDATE))
예제 #2
0
  def test_unknown_symbol(self):
    print_name('TEST UNKNOWN SYMBOL')
    writers_schema = schema.parse("""\
      {"type": "enum", "name": "Test",
       "symbols": ["FOO", "BAR"]}""")
    datum_to_write = 'FOO'

    readers_schema = schema.parse("""\
      {"type": "enum", "name": "Test",
       "symbols": ["BAR", "BAZ"]}""")

    with self.assertRaises(io.SchemaResolutionException) as context:
        writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
        reader = StringIO(writer.getvalue())
        decoder = io.BinaryDecoder(reader)
        datum_reader = io.DatumReader(writers_schema, readers_schema)
예제 #3
0
    def __init__(self, writer, datum_writer, writers_schema=None, codec='null'):
        """
        If the schema is not present, presume we're appending.

        @param writer: File-like object to write into.
        """
        self._writer = writer
        self._encoder = io.BinaryEncoder(writer)
        self._datum_writer = datum_writer
        self._buffer_writer = StringIO()
        self._buffer_encoder = io.BinaryEncoder(self._buffer_writer)
        self._block_count = 0
        self._meta = {}
        self._header_written = False

        if writers_schema is not None:
            if codec not in VALID_CODECS:
                raise DataFileException("Unknown codec: %r" % codec)
            self._sync_marker = DataFileWriter.generate_sync_marker()
            self.set_meta('avro.codec', codec)
            self.set_meta('avro.schema', str(writers_schema))
            self.datum_writer.writers_schema = writers_schema
        else:
            if writer.mode:
                if writer.mode not in ('rb+', 'ab+'):
                    raise DataFileException("When appending records to an Avro data file, the file object passed into DataFileWriter must be opened in read/write mode, e.g. for files: \"rb+\" or \"ab+\"")
            else:
                if not (writer.readable() and writer.writable()):
                    raise DataFileException("When appending records to an Avro data file, the file object passed into DataFileWriter must be opened in read/write mode, e.g. for files: \"rb+\" or \"ab+\"")
            # open writer for reading to collect metadata
            dfr = DataFileReader(writer, io.DatumReader())

            # TODO(hammer): collect arbitrary metadata
            # collect metadata
            self._sync_marker = dfr.sync_marker
            self.set_meta('avro.codec', dfr.get_meta('avro.codec'))

            # get schema used to write existing file
            schema_from_file = dfr.get_meta('avro.schema')
            self.set_meta('avro.schema', schema_from_file)
            self.datum_writer.writers_schema = schema.parse(schema_from_file)

            # seek to the end of the file and prepare for writing
            writer.seek(0, 2)
            self._header_written = True
예제 #4
0
    def test_round_trip(self):
        print('')
        print('TEST ROUND TRIP')
        print('===============')
        print('')
        correct = 0
        print(SCHEMAS_TO_VALIDATE)
        for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
            for codec in CODECS_TO_VALIDATE:
                print('')
                print('SCHEMA NUMBER %d' % (i + 1))
                print('================')
                print('')
                print('Schema: %s' % example_schema)
                print('Datum: %s' % datum)
                print('Codec: %s' % codec)

                # write data in binary to file 10 times
                writer = open(FILENAME, 'wb')
                datum_writer = io.DatumWriter()
                schema_object = schema.parse(example_schema)
                dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec)
                for datum_counter in range(10):
                    dfw.append(datum)
                dfw.close()

                # read data in binary from file
                reader = open(FILENAME, 'rb')
                datum_reader = io.DatumReader()
                dfr = datafile.DataFileReader(reader, datum_reader)
                round_trip_data = []
                for read_datum in dfr:
                    round_trip_data.append(read_datum)

                print('Round Trip Data: %s' % round_trip_data)
                print('Round Trip Data Length: %d' % len(round_trip_data))
                is_correct = [datum] * 10 == round_trip_data
                if is_correct:
                    correct += 1
                print('Correct Round Trip: %s' % is_correct)
                print('')
        os.remove(FILENAME)
        self.assertEquals(correct, len(CODECS_TO_VALIDATE) * len(SCHEMAS_TO_VALIDATE))
예제 #5
0
    def test_metadata(self):
        # Test the writer with a 'with' statement.
        writer = open(FILENAME, 'wb')
        datum_writer = io.DatumWriter()
        sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1]
        schema_object = schema.parse(sample_schema)
        with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw:
            dfw.set_meta('test.string', 'foo')
            dfw.set_meta('test.number', '1')
            dfw.append(sample_datum)
        self.assertTrue(writer.closed)

        # Test the reader with a 'with' statement.
        datums = []
        reader = open(FILENAME, 'rb')
        datum_reader = io.DatumReader()
        with datafile.DataFileReader(reader, datum_reader) as dfr:
            self.assertEquals('foo', dfr.get_meta('test.string'))
            self.assertEquals('1', dfr.get_meta('test.number'))
            for datum in dfr:
                datums.append(datum)
        self.assertTrue(reader.closed)
예제 #6
0
    def test_context_manager(self):
        # Context manager was introduced as a first class
        # member only in Python 2.6 and above.
        import sys
        if sys.version_info < (2,6):
            print('Skipping context manager tests on this Python version.')
            return
        # Test the writer with a 'with' statement.
        writer = open(FILENAME, 'wb')
        datum_writer = io.DatumWriter()
        sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1]
        schema_object = schema.parse(sample_schema)
        with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw:
            dfw.append(sample_datum)
        self.assertTrue(writer.closed)

        # Test the reader with a 'with' statement.
        datums = []
        reader = open(FILENAME, 'rb')
        datum_reader = io.DatumReader()
        with datafile.DataFileReader(reader, datum_reader) as dfr:
            for datum in dfr:
                datums.append(datum)
        self.assertTrue(reader.closed)
예제 #7
0
def read_datum(buffer, writers_schema, readers_schema=None):
    reader = StringIO(buffer.getvalue())
    decoder = io.BinaryDecoder(reader)
    datum_reader = io.DatumReader(writers_schema, readers_schema)
    return datum_reader.read(decoder)
예제 #8
0
 def read_request(self, writers_schema, readers_schema, decoder):
     datum_reader = io.DatumReader(writers_schema, readers_schema)
     return datum_reader.read(decoder)
예제 #9
0
 def read_error(self, writers_schema, readers_schema, decoder):
     datum_reader = io.DatumReader(writers_schema, readers_schema)
     return AvroRemoteException(datum_reader.read(decoder))
예제 #10
0
 def read_response(self, writers_schema, readers_schema, decoder):
     datum_reader = io.DatumReader(writers_schema, readers_schema)
     result = datum_reader.read(decoder)
     return result
예제 #11
0
# Handshake schema is pulled in during build
with open('./ipc/HandshakeRequest.avsc', 'rb') as handshake_request:
    HANDSHAKE_REQUEST_SCHEMA = schema.parse(
        handshake_request.read().decode('utf-8'))
# """
# """)
with open('./ipc/HandshakeResponse.avsc', 'rb') as handshake_response:
    HANDSHAKE_RESPONSE_SCHEMA = schema.parse(
        handshake_response.read().decode('utf-8'))

# HANDSHAKE_RESPONSE_SCHEMA = schema.parse("""
# @HANDSHAKE_RESPONSE_SCHEMA@
# """)

HANDSHAKE_REQUESTOR_WRITER = io.DatumWriter(HANDSHAKE_REQUEST_SCHEMA)
HANDSHAKE_REQUESTOR_READER = io.DatumReader(HANDSHAKE_RESPONSE_SCHEMA)
HANDSHAKE_RESPONDER_WRITER = io.DatumWriter(HANDSHAKE_RESPONSE_SCHEMA)
HANDSHAKE_RESPONDER_READER = io.DatumReader(HANDSHAKE_REQUEST_SCHEMA)

META_SCHEMA = schema.parse('{"type": "map", "values": "bytes"}')
META_WRITER = io.DatumWriter(META_SCHEMA)
META_READER = io.DatumReader(META_SCHEMA)

SYSTEM_ERROR_SCHEMA = schema.parse('["string"]')

# protocol cache
REMOTE_HASHES = {}
REMOTE_PROTOCOLS = {}

BIG_ENDIAN_INT_STRUCT = io.struct_class('!I')
BUFFER_HEADER_LENGTH = 4