def decode(cls, data, bytes_to_read=None): """Compressed messages should pass in bytes_to_read (via message size) otherwise, we decode from data as Int32 """ if isinstance(data, bytes): data = io.BytesIO(data) if bytes_to_read is None: bytes_to_read = Int32.decode(data) # if FetchRequest max_bytes is smaller than the available message set # the server returns partial data for the final message # So create an internal buffer to avoid over-reading raw = io.BytesIO(data.read(bytes_to_read)) items = [] while bytes_to_read: try: offset = Int64.decode(raw) msg_bytes = Bytes.decode(raw) bytes_to_read -= 8 + 4 + len(msg_bytes) items.append( (offset, len(msg_bytes), Message.decode(msg_bytes))) except ValueError: # PartialMessage to signal that max_bytes may be too small items.append((None, None, PartialMessage())) break return items
def decode(cls, data, bytes_to_read=None): """Compressed messages should pass in bytes_to_read (via message size) otherwise, we decode from data as Int32 """ if isinstance(data, bytes): data = io.BytesIO(data) if bytes_to_read is None: bytes_to_read = Int32.decode(data) # if FetchRequest max_bytes is smaller than the available message set # the server returns partial data for the final message # So create an internal buffer to avoid over-reading raw = io.BytesIO(data.read(bytes_to_read)) items = [] while bytes_to_read: try: offset = Int64.decode(raw) msg_bytes = Bytes.decode(raw) bytes_to_read -= 8 + 4 + len(msg_bytes) items.append((offset, len(msg_bytes), Message.decode(msg_bytes))) except ValueError: # PartialMessage to signal that max_bytes may be too small items.append((None, None, PartialMessage())) break return items
def _process_response(self, read_buffer): assert not self._processing, 'Recursion not supported' self._processing = True ifr = self.in_flight_requests.popleft() # verify send/recv correlation ids match recv_correlation_id = Int32.decode(read_buffer) # 0.8.2 quirk if (self.config['api_version'] == (0, 8, 2) and ifr.response_type is GroupCoordinatorResponse[0] and ifr.correlation_id != 0 and recv_correlation_id == 0): log.warning('Kafka 0.8.2 quirk -- GroupCoordinatorResponse' ' coorelation id does not match request. This' ' should go away once at least one topic has been' ' initialized on the broker') elif ifr.correlation_id != recv_correlation_id: error = Errors.CorrelationIdError( '%s: Correlation ids do not match: sent %d, recv %d' % (str(self), ifr.correlation_id, recv_correlation_id)) ifr.future.failure(error) self.close() self._processing = False return None # decode response response = ifr.response_type.decode(read_buffer) log.debug('%s Response %d: %s', self, ifr.correlation_id, response) ifr.future.success(response) self._processing = False return response
def _process_response(self, read_buffer): assert not self._processing, 'Recursion not supported' self._processing = True ifr = self.in_flight_requests.popleft() # verify send/recv correlation ids match recv_correlation_id = Int32.decode(read_buffer) # 0.8.2 quirk if (self.config['api_version'] == (0, 8, 2) and ifr.response_type is GroupCoordinatorResponse and recv_correlation_id == 0): raise Errors.KafkaError( 'Kafka 0.8.2 quirk -- try creating a topic first') elif ifr.correlation_id != recv_correlation_id: error = Errors.CorrelationIdError( 'Correlation ids do not match: sent %d, recv %d' % (ifr.correlation_id, recv_correlation_id)) ifr.future.fail(error) self.close() self._processing = False return None # decode response response = ifr.response_type.decode(read_buffer) log.debug('%s Response %d: %s', self, ifr.correlation_id, response) ifr.future.success(response) self._processing = False return response
def server_thread(): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as ss: ss.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) ss.bind(('127.0.0.1', PORT)) ss.listen(1) event.set() (cs, _) = ss.accept() with cs: size = Int32.decode(BytesIO(cs.recv(4, socket.MSG_WAITALL))) id = Int32.decode(BytesIO(cs.recv(4, socket.MSG_WAITALL))) cs.recv(size - 4, socket.MSG_WAITALL) if need_response: message = b''.join([Int32.encode(id), response.encode()]) cs.send(Int32.encode(len(message)) + message) event.set()
def receive_bytes(self, data): """Process bytes received from the network. Arguments: data (bytes): any length bytes received from a network connection to a kafka broker. Returns: responses (list of (correlation_id, response)): any/all completed responses, decoded from bytes to python objects. Raises: KafkaProtocolError: if the bytes received could not be decoded. CorrelationIdError: if the response does not match the request correlation id. """ i = 0 n = len(data) responses = [] while i < n: # Not receiving is the state of reading the payload header if not self._receiving: bytes_to_read = min(4 - self._header.tell(), n - i) self._header.write(data[i:i + bytes_to_read]) i += bytes_to_read if self._header.tell() == 4: self._header.seek(0) nbytes = Int32.decode(self._header) # reset buffer and switch state to receiving payload bytes self._rbuffer = KafkaBytes(nbytes) self._receiving = True elif self._header.tell() > 4: raise Errors.KafkaError( "this should not happen - are you threading?") if self._receiving: total_bytes = len(self._rbuffer) staged_bytes = self._rbuffer.tell() bytes_to_read = min(total_bytes - staged_bytes, n - i) self._rbuffer.write(data[i:i + bytes_to_read]) i += bytes_to_read staged_bytes = self._rbuffer.tell() if staged_bytes > total_bytes: raise Errors.KafkaError( "Receive buffer has more bytes than expected?") if staged_bytes != total_bytes: break self._receiving = False self._rbuffer.seek(0) resp = self._process_response(self._rbuffer) responses.append(resp) self._reset_buffer() return responses
def receive_bytes(self, data): """Process bytes received from the network. Arguments: data (bytes): any length bytes received from a network connection to a kafka broker. Returns: responses (list of (correlation_id, response)): any/all completed responses, decoded from bytes to python objects. Raises: KafkaProtocolError: if the bytes received could not be decoded. CorrelationIdError: if the response does not match the request correlation id. """ i = 0 n = len(data) responses = [] while i < n: # Not receiving is the state of reading the payload header if not self._receiving: bytes_to_read = min(4 - self._header.tell(), n - i) self._header.write(data[i:i+bytes_to_read]) i += bytes_to_read if self._header.tell() == 4: self._header.seek(0) nbytes = Int32.decode(self._header) # reset buffer and switch state to receiving payload bytes self._rbuffer = KafkaBytes(nbytes) self._receiving = True elif self._header.tell() > 4: raise Errors.KafkaError('this should not happen - are you threading?') if self._receiving: total_bytes = len(self._rbuffer) staged_bytes = self._rbuffer.tell() bytes_to_read = min(total_bytes - staged_bytes, n - i) self._rbuffer.write(data[i:i+bytes_to_read]) i += bytes_to_read staged_bytes = self._rbuffer.tell() if staged_bytes > total_bytes: raise Errors.KafkaError('Receive buffer has more bytes than expected?') if staged_bytes != total_bytes: break self._receiving = False self._rbuffer.seek(0) resp = self._process_response(self._rbuffer) responses.append(resp) self._reset_buffer() return responses
def _process_response(self, read_buffer): assert not self._processing, 'Recursion not supported' self._processing = True ifr = self.in_flight_requests.popleft() if self._sensors: self._sensors.request_time.record( (time.time() - ifr.timestamp) * 1000) # verify send/recv correlation ids match recv_correlation_id = Int32.decode(read_buffer) # 0.8.2 quirk if (self.config['api_version'] == (0, 8, 2) and ifr.response_type is GroupCoordinatorResponse[0] and ifr.correlation_id != 0 and recv_correlation_id == 0): log.warning('Kafka 0.8.2 quirk -- GroupCoordinatorResponse' ' Correlation ID does not match request. This' ' should go away once at least one topic has been' ' initialized on the broker.') elif ifr.correlation_id != recv_correlation_id: error = Errors.CorrelationIdError( '%s: Correlation IDs do not match: sent %d, recv %d' % (self, ifr.correlation_id, recv_correlation_id)) ifr.future.failure(error) self.close(error) self._processing = False return None # decode response try: response = ifr.response_type.decode(read_buffer) except ValueError: read_buffer.seek(0) buf = read_buffer.read() log.error( '%s Response %d [ResponseType: %s Request: %s]:' ' Unable to decode %d-byte buffer: %r', self, ifr.correlation_id, ifr.response_type, ifr.request, len(buf), buf) error = Errors.UnknownError('Unable to decode response') ifr.future.failure(error) self.close(error) self._processing = False return None log.debug('%s Response %d: %s', self, ifr.correlation_id, response) ifr.future.success(response) self._processing = False return response
def _process_response(self, read_buffer): assert not self._processing, 'Recursion not supported' self._processing = True ifr = self.in_flight_requests.popleft() if self._sensors: self._sensors.request_time.record((time.time() - ifr.timestamp) * 1000) # verify send/recv correlation ids match recv_correlation_id = Int32.decode(read_buffer) # 0.8.2 quirk if (self.config['api_version'] == (0, 8, 2) and ifr.response_type is GroupCoordinatorResponse[0] and ifr.correlation_id != 0 and recv_correlation_id == 0): log.warning('Kafka 0.8.2 quirk -- GroupCoordinatorResponse' ' coorelation id does not match request. This' ' should go away once at least one topic has been' ' initialized on the broker') elif ifr.correlation_id != recv_correlation_id: error = Errors.CorrelationIdError( '%s: Correlation ids do not match: sent %d, recv %d' % (str(self), ifr.correlation_id, recv_correlation_id)) ifr.future.failure(error) self.close() self._processing = False return None # decode response try: response = ifr.response_type.decode(read_buffer) except ValueError: read_buffer.seek(0) buf = read_buffer.read() log.error('%s Response %d [ResponseType: %s Request: %s]:' ' Unable to decode %d-byte buffer: %r', self, ifr.correlation_id, ifr.response_type, ifr.request, len(buf), buf) ifr.future.failure(Errors.UnknownError('Unable to decode response')) self.close() self._processing = False return None log.debug('%s Response %d: %s', self, ifr.correlation_id, response) ifr.future.success(response) self._processing = False return response
def _process_response(self, read_buffer): recv_correlation_id = Int32.decode(read_buffer) log.debug("Received correlation id: %d", recv_correlation_id) if not self.in_flight_requests: raise Errors.CorrelationIdError( "No in-flight-request found for server response" " with correlation ID %d" % (recv_correlation_id, )) (correlation_id, request) = self.in_flight_requests.popleft() # 0.8.2 quirk if (recv_correlation_id == 0 and correlation_id != 0 and request.RESPONSE_TYPE is GroupCoordinatorResponse[0] and (self._api_version == (0, 8, 2) or self._api_version is None)): log.warning("Kafka 0.8.2 quirk -- GroupCoordinatorResponse" " Correlation ID does not match request. This" " should go away once at least one topic has been" " initialized on the broker.") elif correlation_id != recv_correlation_id: # return or raise? raise Errors.CorrelationIdError( "Correlation IDs do not match: sent %d, recv %d" % (correlation_id, recv_correlation_id)) # decode response log.debug("Processing response %s", request.RESPONSE_TYPE.__name__) try: response = request.RESPONSE_TYPE.decode(read_buffer) except ValueError: read_buffer.seek(0) buf = read_buffer.read() log.error( "Response %d [ResponseType: %s Request: %s]:" " Unable to decode %d-byte buffer: %r", correlation_id, request.RESPONSE_TYPE, request, len(buf), buf, ) raise Errors.KafkaProtocolError("Unable to decode response") return (correlation_id, response)
def encode(cls, items, prepend_size=True): # RecordAccumulator encodes messagesets internally if isinstance(items, (io.BytesIO, KafkaBytes)): size = Int32.decode(items) if prepend_size: # rewind and return all the bytes items.seek(items.tell() - 4) size += 4 return items.read(size) encoded_values = [] for (offset, message) in items: encoded_values.append(Int64.encode(offset)) encoded_values.append(Bytes.encode(message)) encoded = b''.join(encoded_values) if prepend_size: return Bytes.encode(encoded) else: return encoded
def _process_response(self, read_buffer): recv_correlation_id = Int32.decode(read_buffer) log.debug('Received correlation id: %d', recv_correlation_id) if not self.in_flight_requests: raise Errors.CorrelationIdError( 'No in-flight-request found for server response' ' with correlation ID %d' % recv_correlation_id) (correlation_id, request) = self.in_flight_requests.popleft() # 0.8.2 quirk if (self._api_version == (0, 8, 2) and request.RESPONSE_TYPE is GroupCoordinatorResponse[0] and correlation_id != 0 and recv_correlation_id == 0): log.warning('Kafka 0.8.2 quirk -- GroupCoordinatorResponse' ' Correlation ID does not match request. This' ' should go away once at least one topic has been' ' initialized on the broker.') elif correlation_id != recv_correlation_id: # return or raise? raise Errors.CorrelationIdError( 'Correlation IDs do not match: sent %d, recv %d' % (correlation_id, recv_correlation_id)) # decode response log.debug('Processing response %s', request.RESPONSE_TYPE.__name__) try: response = request.RESPONSE_TYPE.decode(read_buffer) except ValueError: read_buffer.seek(0) buf = read_buffer.read() log.error('Response %d [ResponseType: %s Request: %s]:' ' Unable to decode %d-byte buffer: %r', correlation_id, request.RESPONSE_TYPE, request, len(buf), buf) raise Errors.KafkaProtocolError('Unable to decode response') return (correlation_id, response)
def send(request, wait_response=True): global correlation_id with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.connect(('127.0.0.1', PORT)) header = RequestHeader( request=request, correlation_id=correlation_id, client_id='test' ) correlation_id += 1 message = b''.join([header.encode(), request.encode()]) size = Int32.encode(len(message)) s.send(size) s.send(message) if wait_response: size = s.recv(4, socket.MSG_WAITALL) size = Int32.decode(BytesIO(size)) s.recv(size, socket.MSG_WAITALL)
def recv(self): """Non-blocking network receive. Return response if available """ assert not self._processing, 'Recursion not supported' if not self.connected(): log.warning('%s cannot recv: socket not connected', self) # If requests are pending, we should close the socket and # fail all the pending request futures if self.in_flight_requests: self.close() return None elif not self.in_flight_requests: log.warning('%s: No in-flight-requests to recv', self) return None elif self._requests_timed_out(): log.warning('%s timed out after %s ms. Closing connection.', self, self.config['request_timeout_ms']) self.close(error=Errors.RequestTimedOutError( 'Request timed out after %s ms' % self.config['request_timeout_ms'])) return None # Not receiving is the state of reading the payload header if not self._receiving: try: bytes_to_read = 4 - self._rbuffer.tell() data = self._sock.recv(bytes_to_read) # We expect socket.recv to raise an exception if there is not # enough data to read the full bytes_to_read # but if the socket is disconnected, we will get empty data # without an exception raised if not data: log.error('%s: socket disconnected', self) self.close(error=Errors.ConnectionError('socket disconnected')) return None self._rbuffer.write(data) except ssl.SSLWantReadError: return None except ConnectionError as e: if six.PY2 and e.errno == errno.EWOULDBLOCK: return None log.exception('%s: Error receiving 4-byte payload header -' ' closing socket', self) self.close(error=Errors.ConnectionError(e)) return None except BlockingIOError: if six.PY3: return None raise if self._rbuffer.tell() == 4: self._rbuffer.seek(0) self._next_payload_bytes = Int32.decode(self._rbuffer) # reset buffer and switch state to receiving payload bytes self._rbuffer.seek(0) self._rbuffer.truncate() self._receiving = True elif self._rbuffer.tell() > 4: raise Errors.KafkaError('this should not happen - are you threading?') if self._receiving: staged_bytes = self._rbuffer.tell() try: bytes_to_read = self._next_payload_bytes - staged_bytes data = self._sock.recv(bytes_to_read) # We expect socket.recv to raise an exception if there is not # enough data to read the full bytes_to_read # but if the socket is disconnected, we will get empty data # without an exception raised if not data: log.error('%s: socket disconnected', self) self.close(error=Errors.ConnectionError('socket disconnected')) return None self._rbuffer.write(data) except ssl.SSLWantReadError: return None except ConnectionError as e: # Extremely small chance that we have exactly 4 bytes for a # header, but nothing to read in the body yet if six.PY2 and e.errno == errno.EWOULDBLOCK: return None log.exception('%s: Error in recv', self) self.close(error=Errors.ConnectionError(e)) return None except BlockingIOError: if six.PY3: return None raise staged_bytes = self._rbuffer.tell() if staged_bytes > self._next_payload_bytes: self.close(error=Errors.KafkaError('Receive buffer has more bytes than expected?')) if staged_bytes != self._next_payload_bytes: return None self._receiving = False self._next_payload_bytes = 0 self._rbuffer.seek(0) response = self._process_response(self._rbuffer) self._rbuffer.seek(0) self._rbuffer.truncate() return response
def _recv(self): # Not receiving is the state of reading the payload header if not self._receiving: try: bytes_to_read = 4 - self._rbuffer.tell() data = self._sock.recv(bytes_to_read) # We expect socket.recv to raise an exception if there is not # enough data to read the full bytes_to_read # but if the socket is disconnected, we will get empty data # without an exception raised if not data: log.error('%s: socket disconnected', self) self.close(error=Errors.ConnectionError('socket disconnected')) return None self._rbuffer.write(data) except ssl.SSLWantReadError: return None except ConnectionError as e: if six.PY2 and e.errno == errno.EWOULDBLOCK: return None log.exception('%s: Error receiving 4-byte payload header -' ' closing socket', self) self.close(error=Errors.ConnectionError(e)) return None except BlockingIOError: if six.PY3: return None raise if self._rbuffer.tell() == 4: self._rbuffer.seek(0) self._next_payload_bytes = Int32.decode(self._rbuffer) # reset buffer and switch state to receiving payload bytes self._rbuffer.seek(0) self._rbuffer.truncate() self._receiving = True elif self._rbuffer.tell() > 4: raise Errors.KafkaError('this should not happen - are you threading?') if self._receiving: staged_bytes = self._rbuffer.tell() try: bytes_to_read = self._next_payload_bytes - staged_bytes data = self._sock.recv(bytes_to_read) # We expect socket.recv to raise an exception if there is not # enough data to read the full bytes_to_read # but if the socket is disconnected, we will get empty data # without an exception raised if bytes_to_read and not data: log.error('%s: socket disconnected', self) self.close(error=Errors.ConnectionError('socket disconnected')) return None self._rbuffer.write(data) except ssl.SSLWantReadError: return None except ConnectionError as e: # Extremely small chance that we have exactly 4 bytes for a # header, but nothing to read in the body yet if six.PY2 and e.errno == errno.EWOULDBLOCK: return None log.exception('%s: Error in recv', self) self.close(error=Errors.ConnectionError(e)) return None except BlockingIOError: if six.PY3: return None raise staged_bytes = self._rbuffer.tell() if staged_bytes > self._next_payload_bytes: self.close(error=Errors.KafkaError('Receive buffer has more bytes than expected?')) if staged_bytes != self._next_payload_bytes: return None self._receiving = False self._next_payload_bytes = 0 if self._sensors: self._sensors.bytes_received.record(4 + self._rbuffer.tell()) self._rbuffer.seek(0) response = self._process_response(self._rbuffer) self._rbuffer.seek(0) self._rbuffer.truncate() return response
def recv(self, timeout=0): """Non-blocking network receive. Return response if available """ assert not self._processing, 'Recursion not supported' if not self.connected(): log.warning('%s cannot recv: socket not connected', self) # If requests are pending, we should close the socket and # fail all the pending request futures if self.in_flight_requests: self.close() return None elif not self.in_flight_requests: log.warning('%s: No in-flight-requests to recv', self) return None elif self._requests_timed_out(): log.warning('%s timed out after %s ms. Closing connection.', self, self.config['request_timeout_ms']) self.close(error=Errors.RequestTimedOutError( 'Request timed out after %s ms' % self.config['request_timeout_ms'])) return None readable, _, _ = select([self._sock], [], [], timeout) if not readable: return None # Not receiving is the state of reading the payload header if not self._receiving: try: # An extremely small, but non-zero, probability that there are # more than 0 but not yet 4 bytes available to read self._rbuffer.write(self._sock.recv(4 - self._rbuffer.tell())) except ConnectionError as e: if six.PY2 and e.errno == errno.EWOULDBLOCK: # This shouldn't happen after selecting above # but just in case return None log.exception('%s: Error receiving 4-byte payload header -' ' closing socket', self) self.close(error=Errors.ConnectionError(e)) return None except BlockingIOError: if six.PY3: return None raise if self._rbuffer.tell() == 4: self._rbuffer.seek(0) self._next_payload_bytes = Int32.decode(self._rbuffer) # reset buffer and switch state to receiving payload bytes self._rbuffer.seek(0) self._rbuffer.truncate() self._receiving = True elif self._rbuffer.tell() > 4: raise Errors.KafkaError('this should not happen - are you threading?') if self._receiving: staged_bytes = self._rbuffer.tell() try: self._rbuffer.write(self._sock.recv(self._next_payload_bytes - staged_bytes)) except ConnectionError as e: # Extremely small chance that we have exactly 4 bytes for a # header, but nothing to read in the body yet if six.PY2 and e.errno == errno.EWOULDBLOCK: return None log.exception('%s: Error in recv', self) self.close(error=Errors.ConnectionError(e)) return None except BlockingIOError: if six.PY3: return None raise staged_bytes = self._rbuffer.tell() if staged_bytes > self._next_payload_bytes: self.close(error=Errors.KafkaError('Receive buffer has more bytes than expected?')) if staged_bytes != self._next_payload_bytes: return None self._receiving = False self._next_payload_bytes = 0 self._rbuffer.seek(0) response = self._process_response(self._rbuffer) self._rbuffer.seek(0) self._rbuffer.truncate() return response