def _read_block_header(self): self.block_count = self.raw_decoder.read_long() if self.codec == "null": # Skip a long; we don't need to use the length. self.raw_decoder.skip_long() self._datum_decoder = self._raw_decoder elif self.codec == 'deflate': # Compressed data is stored as (length, data), which # corresponds to how the "bytes" type is encoded. data = self.raw_decoder.read_bytes() # -15 is the log of the window size; negative indicates # "raw" (no zlib headers) decompression. See zlib.h. uncompressed = zlib.decompress(data, -15) self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed)) elif self.codec == 'snappy': # Compressed data includes a 4-byte CRC32 checksum length = self.raw_decoder.read_long() data = self.raw_decoder.read(length - 4) uncompressed = snappy.decompress(data) self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed)) self.raw_decoder.check_crc32(uncompressed); elif self.codec == 'xz': # Compressed data is stored as (length, data), which # corresponds to how the "bytes" type is encoded. data = self.raw_decoder.read_bytes() uncompressed = lzma.decompress(data) self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed)) else: raise DataFileException("Unknown codec: %r" % self.codec)
def check_skip_number(number_type): print_name('TEST SKIP %s' % number_type.upper()) correct = 0 for value_to_skip, hex_encoding in BINARY_ENCODINGS: VALUE_TO_READ = 6253 print('Value to Skip: %d' % value_to_skip) # write the value to skip and a known value writers_schema = schema.parse('"%s"' % number_type.lower()) writer, encoder, datum_writer = write_datum(value_to_skip, writers_schema) datum_writer.write(VALUE_TO_READ, encoder) # skip the value reader = StringIO(writer.getvalue()) decoder = io.BinaryDecoder(reader) decoder.skip_long() # read data from string buffer datum_reader = io.DatumReader(writers_schema) read_value = datum_reader.read(decoder) print('Read Value: %d' % read_value) if read_value == VALUE_TO_READ: correct += 1 print('') return correct
def _process_handshake(self, call_response, message_name, request_datum): # process the handshake and call response buffer_decoder = io.BinaryDecoder(StringIO(call_response)) call_response_exists = self.read_handshake_response(buffer_decoder) if call_response_exists: return self.read_call_response(message_name, buffer_decoder) else: return self.request(message_name, request_datum)
def issue_request(self, call_request, message_name, request_datum): call_response = self.transceiver.transceive(call_request) # process the handshake and call response buffer_decoder = io.BinaryDecoder(StringIO(call_response)) call_response_exists = self.read_handshake_response(buffer_decoder) if call_response_exists: return self.read_call_response(message_name, buffer_decoder) else: return self.request(message_name, request_datum)
def test_no_default_value(self): print_name('TEST NO DEFAULT VALUE') writers_schema = LONG_RECORD_SCHEMA datum_to_write = LONG_RECORD_DATUM readers_schema = schema.parse("""\ {"type": "record", "name": "Test", "fields": [{"name": "H", "type": "int"}]}""") writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema) with self.assertRaises(io.SchemaResolutionException) as context: reader = StringIO(writer.getvalue()) decoder = io.BinaryDecoder(reader) datum_reader = io.DatumReader(writers_schema, readers_schema)
def test_unknown_symbol(self): print_name('TEST UNKNOWN SYMBOL') writers_schema = schema.parse("""\ {"type": "enum", "name": "Test", "symbols": ["FOO", "BAR"]}""") datum_to_write = 'FOO' readers_schema = schema.parse("""\ {"type": "enum", "name": "Test", "symbols": ["BAR", "BAZ"]}""") with self.assertRaises(io.SchemaResolutionException) as context: writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema) reader = StringIO(writer.getvalue()) decoder = io.BinaryDecoder(reader) datum_reader = io.DatumReader(writers_schema, readers_schema)
def input(self, data, count): """ Recieve input from the server Parameters ------------------------------------------------------ data - Sould containg the bytes encoding the serialized data - I think this gets represented as a tring count - how many input records are provided in the binary stream """ try: # to avio.BinaryDecoder bdata = StringIO(data) decoder = avio.BinaryDecoder(bdata) for i in range(count): if (self.taskType == TaskType.MAP): inRecord = self.inReader.read(decoder) # Do we need to pass midCollector if its declared as an instance variable self.map(inRecord, self.midCollector) elif (self.taskType == TaskType.REDUCE): # store the previous record prev = self.midRecord # read the new record self.midRecord = self.midReader.read(decoder) if (prev != None and not (keys_are_equal( self.midRecord, prev, self._red_fkeys))): # since the key has changed we need to finalize the processing # for this group of key,value pairs self.reduceFlush(prev, self.outCollector) self.reduce(self.midRecord, self.outCollector) except Exception as e: estr = traceback.format_exc() self.log.warning("failing: " + estr) self.fail(estr)
def __init__(self, reader, datum_reader): self._reader = reader self._raw_decoder = io.BinaryDecoder(reader) self._datum_decoder = None # Maybe reset at every block. self._datum_reader = datum_reader # read the header: magic, meta, sync self._read_header() # ensure codec is valid self.codec = self.get_meta(CODEC_KEY) if self.codec is None: self.codec = "null" if self.codec not in VALID_CODECS: raise DataFileException('Unknown codec: %s.' % self.codec) # get file length self._file_length = self.determine_file_length() # get ready to read self._block_count = 0 self.datum_reader.writers_schema = schema.parse(self.get_meta(SCHEMA_KEY))
def read_datum(buffer, writers_schema, readers_schema=None): reader = StringIO(buffer.getvalue()) decoder = io.BinaryDecoder(reader) datum_reader = io.DatumReader(writers_schema, readers_schema) return datum_reader.read(decoder)
def respond(self, call_request): """ Called by a server to deserialize a request, compute and serialize a response or error. Compare to 'handle()' in Thrift. """ buffer_reader = StringIO(call_request) buffer_decoder = io.BinaryDecoder(buffer_reader) buffer_writer = StringIO() buffer_encoder = io.BinaryEncoder(buffer_writer) error = None response_metadata = {} try: remote_protocol = self.process_handshake(buffer_decoder, buffer_encoder) # handshake failure if remote_protocol is None: return buffer_writer.getvalue() # read request using remote protocol request_metadata = META_READER.read(buffer_decoder) remote_message_name = buffer_decoder.read_utf8() # get remote and local request schemas so we can do # schema resolution (one fine day) remote_message = remote_protocol.messages.get(remote_message_name) if remote_message is None: fail_msg = 'Unknown remote message: %s' % remote_message_name raise schema.AvroException(fail_msg) local_message = self.local_protocol.messages.get( remote_message_name) if local_message is None: fail_msg = 'Unknown local message: %s' % remote_message_name raise schema.AvroException(fail_msg) writers_schema = remote_message.request readers_schema = local_message.request request = self.read_request(writers_schema, readers_schema, buffer_decoder) # perform server logic try: response = self.invoke(local_message, request) except AvroRemoteException as e: error = e except Exception as e: error = AvroRemoteException(str(e)) # write response using local protocol META_WRITER.write(response_metadata, buffer_encoder) buffer_encoder.write_boolean(error is not None) if error is None: writers_schema = local_message.response self.write_response(writers_schema, response, buffer_encoder) else: writers_schema = local_message.errors self.write_error(writers_schema, error, buffer_encoder) except schema.AvroException as e: error = AvroRemoteException(str(e)) buffer_encoder = io.BinaryEncoder(StringIO()) META_WRITER.write(response_metadata, buffer_encoder) buffer_encoder.write_boolean(True) self.write_error(SYSTEM_ERROR_SCHEMA, error, buffer_encoder) return buffer_writer.getvalue()