class Consumer(object): """A simple Kafka consumer implementation This consumer consumes a single partition from a single topic, optionally automatically committing offsets. Use it as follows: * Create an instance of :class:`afkak.KafkaClient` with cluster connectivity details. * Create the :class:`Consumer`, supplying the client, topic, partition, processor function, and optionally fetch specifics, a consumer group, and a commit policy. * Call :meth:`.start` with the offset within the partition at which to start consuming messages. See :meth:`.start` for details. * Process the messages in your :attr:`.processor` callback, returning a :class:`~twisted.internet.defer.Deferred` to provide backpressure as needed. * Once processing resolves, :attr:`.processor` will be called again with the next batch of messages. * When desired, call :meth:`.stop` on the :class:`Consumer` to halt calls to the :attr:`processor` function and cancel any outstanding requests to the Kafka cluster. A :class:`Consumer` may be restarted once stopped. :ivar client: Connected :class:`KafkaClient` for submitting requests to the Kafka cluster. :ivar bytes topic: The topic from which to consume messages. :ivar int partition: The partition from which to consume. :ivar callable processor: The callback function to which the consumer and lists of messages (:class:`afkak.common.SourcedMessage`) will be submitted for processing. The function may return a :class:`~twisted.internet.defer.Deferred` and will not be called again until this Deferred resolves. :ivar bytes consumer_group: Optional consumer group ID for committing offsets of processed messages back to Kafka. :ivar bytes commit_metadata: Optional metadata to store with offsets commit. :ivar int auto_commit_every_n: Number of messages after which the consumer will automatically commit the offset of the last processed message to Kafka. Zero disables, defaulted to :data:`AUTO_COMMIT_MSG_COUNT`. :ivar int auto_commit_every_ms: Time interval in milliseconds after which the consumer will automatically commit the offset of the last processed message to Kafka. Zero disables, defaulted to :data:`AUTO_COMMIT_INTERVAL`. :ivar int fetch_size_bytes: Number of bytes to request in a :class:`FetchRequest`. Kafka will defer fulfilling the request until at least this many bytes can be returned. :ivar int fetch_max_wait_time: Max number of milliseconds the server should wait for that many bytes. :ivar int buffer_size: default 128K. Initial number of bytes to tell Kafka we have available. This will be raised x16 up to 1MB then double up to... :ivar int max_buffer_size: Max number of bytes to tell Kafka we have available. `None` means no limit (the default). Must be larger than the largest message we will find in our topic/partitions. :ivar float request_retry_init_delay: Number of seconds to wait before retrying a failed request to Kafka. :ivar float request_retry_max_delay: Maximum number of seconds to wait before retrying a failed request to Kafka (the delay is increased on each failure and reset to the initial delay upon success). :ivar int request_retry_max_attempts: Maximum number of attempts to make for any request. Default of zero means retry forever; other values must be positive and indicate the number of attempts to make before returning failure. """ def __init__(self, client, topic, partition, processor, consumer_group=None, commit_metadata=None, auto_commit_every_n=None, auto_commit_every_ms=None, fetch_size_bytes=FETCH_MIN_BYTES, fetch_max_wait_time=FETCH_MAX_WAIT_TIME, buffer_size=FETCH_BUFFER_SIZE_BYTES, max_buffer_size=None, request_retry_init_delay=REQUEST_RETRY_MIN_DELAY, request_retry_max_delay=REQUEST_RETRY_MAX_DELAY, request_retry_max_attempts=0): # Store away parameters self.client = client # KafkaClient self.topic = topic = _coerce_topic(topic) self.partition = partition # The partition within the topic we consume self.processor = processor # The callback we call with the msg list # Commit related parameters (Ensure the attr. exist, even if None) if consumer_group is not None: consumer_group = _coerce_consumer_group(consumer_group) self.consumer_group = consumer_group self.commit_metadata = commit_metadata if commit_metadata is not None and not isinstance(commit_metadata, bytes): raise TypeError('commit_metadata={!r} should be bytes'.format( commit_metadata)) self.auto_commit_every_n = None self.auto_commit_every_s = None if consumer_group: # Auto committing is possible... if auto_commit_every_n is None: auto_commit_every_n = AUTO_COMMIT_MSG_COUNT if auto_commit_every_ms is None: auto_commit_every_ms = AUTO_COMMIT_INTERVAL if not isinstance(auto_commit_every_n, Integral): raise ValueError('auto_commit_every_n parameter must be ' 'subtype of Integral') if not isinstance(auto_commit_every_ms, Integral): raise ValueError('auto_commit_every_ms parameter must be ' 'subtype of Integral') if auto_commit_every_ms < 0 or auto_commit_every_n < 0: raise ValueError('auto_commit_every_ms and auto_commit_every_n' ' must be non-negative') self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_s = float(auto_commit_every_ms) / 1000 else: if auto_commit_every_ms or auto_commit_every_n: raise ValueError('An auto_commit_every_x argument set without ' 'specifying consumer_group') # Fetch related instance variables self.fetch_min_bytes = fetch_size_bytes self.fetch_max_wait_time = int(fetch_max_wait_time) self.buffer_size = buffer_size self.max_buffer_size = max_buffer_size # request retry timing self.retry_delay = float(request_retry_init_delay) # fetch only self.retry_init_delay = float(request_retry_init_delay) self.retry_max_delay = float(request_retry_max_delay) self.request_retry_max_attempts = int(request_retry_max_attempts) if (not isinstance(request_retry_max_attempts, Integral) or request_retry_max_attempts < 0): raise ValueError( 'request_retry_max_attempts must be non-negative integer') self._fetch_attempt_count = 1 # # Internal state tracking attributes self._fetch_offset = None # We don't know at what offset to fetch yet self._last_processed_offset = None # Last msg processed offset self._last_committed_offset = None # The last offset stored in Kafka self._stopping = False # We're not stopping yet... self._shuttingdown = False # We're not shutting down either self._shutdown_d = None # deferred for tracking shutdown request self._commit_looper = None # Looping call for auto-commit self._commit_looper_d = None # Deferred for running looping call self._commit_ds = [] # Deferreds to notify when commit completes self._commit_req = None # Track outstanding commit request # For tracking various async operations self._start_d = None # deferred for alerting user of errors self._request_d = None # outstanding KafkaClient request deferred self._retry_call = None # IDelayedCall object for delayed retries self._commit_call = None # IDelayedCall for delayed commit retries self._msg_block_d = None # deferred for each block of messages self._processor_d = None # deferred for a result from processor self._state = '[initialized]' # Keep track of state for debugging # Check parameters for sanity if max_buffer_size is not None and buffer_size > max_buffer_size: raise ValueError("buffer_size (%d) is greater than " "max_buffer_size (%d)" % (buffer_size, max_buffer_size)) if not isinstance(self.partition, Integral): raise ValueError('partition parameter must be subtype of Integral') def __repr__(self): return '<{} {} topic={}, partition={}, processor={}>'.format( self.__class__.__name__, self._state, self.topic, self.partition, self.processor, ) def start(self, start_offset): """ Starts fetching messages from Kafka and delivering them to the :attr:`.processor` function. :param int start_offset: The offset within the partition from which to start fetching. Special values include: :const:`OFFSET_EARLIEST`, :const:`OFFSET_LATEST`, and :const:`OFFSET_COMMITTED`. If the supplied offset is :const:`OFFSET_EARLIEST` or :const:`OFFSET_LATEST` the :class:`Consumer` will use the OffsetRequest Kafka API to retrieve the actual offset used for fetching. In the case :const:`OFFSET_COMMITTED` is used, `commit_policy` MUST be set on the Consumer, and the Consumer will use the OffsetFetchRequest Kafka API to retrieve the actual offset used for fetching. :returns: A :class:`~twisted.internet.defer.Deferred` which will resolve successfully when the consumer is cleanly stopped, or with a failure if the :class:`Consumer` encounters an error from which it is unable to recover. :raises: :exc:`RestartError` if already running. """ # Have we been started already, and not stopped? if self._start_d is not None: raise RestartError("Start called on already-started consumer") # Keep track of state for debugging self._state = '[started]' # Create and return a deferred for alerting on errors/stopage start_d = self._start_d = Deferred() # Start a new fetch request, possibly just for the starting offset self._fetch_offset = start_offset self._do_fetch() # Set up the auto-commit timer, if needed if self.consumer_group and self.auto_commit_every_s: self._commit_looper = LoopingCall(self._auto_commit) self._commit_looper.clock = self.client.reactor self._commit_looper_d = self._commit_looper.start( self.auto_commit_every_s, now=False) self._commit_looper_d.addCallbacks(self._commit_timer_stopped, self._commit_timer_failed) return start_d def shutdown(self): """Gracefully shutdown the consumer Consumer will complete any outstanding processing, commit its current offsets (if so configured) and stop. Returns deferred which callbacks with a tuple of: (last processed offset, last committed offset) if it was able to successfully commit, or errbacks with the commit failure, if any, or fail(RestopError) if consumer is not running. """ def _handle_shutdown_commit_success(result): """Handle the result of the commit attempted by shutdown""" self._shutdown_d, d = None, self._shutdown_d self.stop() self._shuttingdown = False # Shutdown complete d.callback((self._last_processed_offset, self._last_committed_offset)) def _handle_shutdown_commit_failure(failure): """Handle failure of commit() attempted by shutdown""" if failure.check(OperationInProgress): failure.value.deferred.addCallback(_commit_and_stop) return self._shutdown_d, d = None, self._shutdown_d self.stop() self._shuttingdown = False # Shutdown complete d.errback(failure) def _commit_and_stop(result): """Commit the current offsets (if needed) and stop the consumer""" if not self.consumer_group: # No consumer group, no committing return _handle_shutdown_commit_success(None) # Need to commit prior to stopping self.commit().addCallbacks(_handle_shutdown_commit_success, _handle_shutdown_commit_failure) # If we're not running, return an failure if self._start_d is None: return fail(Failure( RestopError("Shutdown called on non-running consumer"))) # If we're called multiple times, return a failure if self._shutdown_d: return fail(Failure( RestopError("Shutdown called more than once."))) # Set our _shuttingdown flag, so our _process_message routine will stop # feeding new messages to the processor, and fetches won't be retried self._shuttingdown = True # Keep track of state for debugging self._state = '[shutting down]' # Create a deferred to track the shutdown self._shutdown_d = d = Deferred() # Are we waiting for the processor to complete? If so, when it's done, # commit our offsets and stop. if self._processor_d: self._processor_d.addCallback(_commit_and_stop) else: # No need to wait for the processor, we can commit and stop now _commit_and_stop(None) # return the deferred return d def stop(self): """ Stop the consumer and return offset of last processed message. This cancels all outstanding operations. Also, if the deferred returned by `start` hasn't been called, it is called with a tuple consisting of the last processed offset and the last committed offset. :raises: :exc:`RestopError` if the :class:`Consumer` is not running. """ if self._start_d is None: raise RestopError("Stop called on non-running consumer") self._stopping = True # Keep track of state for debugging self._state = '[stopping]' # Are we waiting for a request to come back? if self._request_d: self._request_d.cancel() # Are we working our way through a block of messages? if self._msg_block_d: # Need to add a cancel handler... _msg_block_d, self._msg_block_d = self._msg_block_d, None _msg_block_d.addErrback(lambda fail: fail.trap(CancelledError)) _msg_block_d.cancel() # Are we waiting for the processor to complete? if self._processor_d: self._processor_d.cancel() # Are we waiting to retry a request? if self._retry_call: self._retry_call.cancel() # Are we waiting on a commit request? if self._commit_ds: while self._commit_ds: d = self._commit_ds.pop() d.cancel() if self._commit_req: self._commit_req.cancel() # Are we waiting to retry a commit? if self._commit_call: self._commit_call.cancel() # Do we have an auto-commit looping call? if self._commit_looper is not None: self._commit_looper.stop() # Done stopping self._stopping = False # Keep track of state for debugging self._state = '[stopped]' # Clear and possibly callback our start() Deferred self._start_d, d = None, self._start_d if not d.called: d.callback((self._last_processed_offset, self._last_committed_offset)) # Return the offset of the message we last processed return self._last_processed_offset def commit(self): """ Commit the offset of the message we last processed if it is different from what we believe is the last offset committed to Kafka. .. note:: It is possible to commit a smaller offset than Kafka has stored. This is by design, so we can reprocess a Kafka message stream if desired. On error, will retry according to :attr:`request_retry_max_attempts` (by default, forever). If called while a commit operation is in progress, and new messages have been processed since the last request was sent then the commit will fail with :exc:`OperationInProgress`. The :exc:`OperationInProgress` exception wraps a :class:`~twisted.internet.defer.Deferred` which fires when the outstanding commit operation completes. :returns: A :class:`~twisted.internet.defer.Deferred` which resolves with the committed offset when the operation has completed. It will resolve immediately if the current offset and the last committed offset do not differ. """ # Can't commit without a consumer_group if not self.consumer_group: return fail(Failure(InvalidConsumerGroupError( "Bad Group_id:{0!r}".format(self.consumer_group)))) # short circuit if we are 'up to date', or haven't processed anything if ((self._last_processed_offset is None) or (self._last_processed_offset == self._last_committed_offset)): return succeed(self._last_committed_offset) # If we're currently processing a commit we return a failure # with a deferred we'll fire when the in-progress one completes if self._commit_ds: d = Deferred() self._commit_ds.append(d) return fail(OperationInProgress(d)) # Ok, we have processed messages since our last commit attempt, and # we're not currently waiting on a commit request to complete: # Start a new one d = Deferred() self._commit_ds.append(d) # Send the request self._send_commit_request() # Reset the commit_looper here, rather than on success to give # more stability to the commit interval. if self._commit_looper is not None: self._commit_looper.reset() # return the deferred return d # # Private Methods # # def _retry_auto_commit(self, result, by_count=False): self._auto_commit(by_count) return result def _auto_commit(self, by_count=False): """Check if we should start a new commit operation and commit""" # Check if we are even supposed to do any auto-committing if (self._stopping or self._shuttingdown or (not self._start_d) or (self._last_processed_offset is None) or (not self.consumer_group) or (by_count and not self.auto_commit_every_n)): return # If we're auto_committing because the timer expired, or by count and # we don't have a record of our last_committed_offset, or we've # processed enough messages since our last commit, then try to commit if (not by_count or self._last_committed_offset is None or (self._last_processed_offset - self._last_committed_offset ) >= self.auto_commit_every_n): if not self._commit_ds: commit_d = self.commit() commit_d.addErrback(self._handle_auto_commit_error) else: # We're waiting on the last commit to complete, so add a # callback to be called when the current request completes d = Deferred() d.addCallback(self._retry_auto_commit, by_count) self._commit_ds.append(d) def _retry_fetch(self, after=None): """ Schedule a delayed :meth:`_do_fetch` call after a failure :param float after: The delay in seconds after which to do the retried fetch. If `None`, our internal :attr:`retry_delay` is used, and adjusted by :const:`REQUEST_RETRY_FACTOR`. """ # Have we been told to stop or shutdown? Then don't actually retry. if self._stopping or self._shuttingdown or self._start_d is None: # Stopping, or stopped already? No more fetching. return if self._retry_call is None: if after is None: after = self.retry_delay self.retry_delay = min(self.retry_delay * REQUEST_RETRY_FACTOR, self.retry_max_delay) self._fetch_attempt_count += 1 self._retry_call = self.client.reactor.callLater( after, self._do_fetch) def _handle_offset_response(self, response): """ Handle responses to both OffsetRequest and OffsetFetchRequest, since they are similar enough. :param response: A tuple of a single OffsetFetchResponse or OffsetResponse """ # Got a response, clear our outstanding request deferred self._request_d = None # Successful request, reset our retry delay, count, etc self.retry_delay = self.retry_init_delay self._fetch_attempt_count = 1 response = response[0] if hasattr(response, 'offsets'): # It's a response to an OffsetRequest self._fetch_offset = response.offsets[0] else: # It's a response to an OffsetFetchRequest # Make sure we got a valid offset back. Kafka uses -1 to indicate # no committed offset was retrieved if response.offset == OFFSET_NOT_COMMITTED: self._fetch_offset = OFFSET_EARLIEST else: self._fetch_offset = response.offset + 1 self._last_committed_offset = response.offset self._do_fetch() def _handle_offset_error(self, failure): """ Retry the offset fetch request if appropriate. Once the :attr:`.retry_delay` reaches our :attr:`.retry_max_delay`, we log a warning. This should perhaps be extended to abort sooner on certain errors. """ # outstanding request got errback'd, clear it self._request_d = None if self._stopping and failure.check(CancelledError): # Not really an error return # Do we need to abort? if (self.request_retry_max_attempts != 0 and self._fetch_attempt_count >= self.request_retry_max_attempts): log.debug( "%r: Exhausted attempts: %d fetching offset from kafka: %r", self, self.request_retry_max_attempts, failure) self._start_d.errback(failure) return # Decide how to log this failure... If we have retried so many times # we're at the retry_max_delay, then we log at warning every other time # debug otherwise if (self.retry_delay < self.retry_max_delay or 0 == (self._fetch_attempt_count % 2)): log.debug("%r: Failure fetching offset from kafka: %r", self, failure) else: # We've retried until we hit the max delay, log at warn log.warning("%r: Still failing fetching offset from kafka: %r", self, failure) self._retry_fetch() def _clear_processor_deferred(self, result): self._processor_d = None # It has fired, we can clear it return result def _update_processed_offset(self, result, offset): self._last_processed_offset = offset self._auto_commit(by_count=True) def _clear_commit_req(self, result): self._commit_req = None # It has fired, we can clear it return result def _update_committed_offset(self, result, offset): # successful commit request completed self._last_committed_offset = offset self._deliver_commit_result(offset) return offset def _deliver_commit_result(self, result): # Let anyone waiting know the commit completed. Handle the case where # they try to commit from the callback by preserving self._commit_ds # as a local, but clearing the attribute itself. commit_ds, self._commit_ds = self._commit_ds, [] while commit_ds: d = commit_ds.pop() d.callback(result) def _send_commit_request(self, retry_delay=None, attempt=None): """Send a commit request with our last_processed_offset""" # If there's a _commit_call, and it's not active, clear it, it probably # just called us... if self._commit_call and not self._commit_call.active(): self._commit_call = None # Make sure we only have one outstanding commit request at a time if self._commit_req is not None: raise OperationInProgress(self._commit_req) # Handle defaults if retry_delay is None: retry_delay = self.retry_init_delay if attempt is None: attempt = 1 # Create new OffsetCommitRequest with the latest processed offset commit_offset = self._last_processed_offset commit_request = OffsetCommitRequest( self.topic, self.partition, commit_offset, TIMESTAMP_INVALID, self.commit_metadata) log.debug("Committing off=%d grp=%s tpc=%s part=%s req=%r", self._last_processed_offset, self.consumer_group, self.topic, self.partition, commit_request) # Send the request, add our callbacks self._commit_req = d = self.client.send_offset_commit_request( self.consumer_group, [commit_request]) d.addBoth(self._clear_commit_req) d.addCallbacks( self._update_committed_offset, self._handle_commit_error, callbackArgs=(commit_offset,), errbackArgs=(retry_delay, attempt)) def _handle_commit_error(self, failure, retry_delay, attempt): """ Retry the commit request, depending on failure type Depending on the type of the failure, we retry the commit request with the latest processed offset, or callback/errback self._commit_ds """ # Check if we are stopping and the request was cancelled if self._stopping and failure.check(CancelledError): # Not really an error return self._deliver_commit_result(self._last_committed_offset) # Check that the failure type is a Kafka error...this could maybe be # a tighter check to determine whether a retry will succeed... if not failure.check(KafkaError): log.error("Unhandleable failure during commit attempt: %r\n\t%r", failure, failure.getBriefTraceback()) return self._deliver_commit_result(failure) # Do we need to abort? if (self.request_retry_max_attempts != 0 and attempt >= self.request_retry_max_attempts): log.debug("%r: Exhausted attempts: %d to commit offset: %r", self, self.request_retry_max_attempts, failure) return self._deliver_commit_result(failure) # Check the retry_delay to see if we should log at the higher level # Using attempts % 2 gets us 1-warn/minute with defaults timings if (retry_delay < self.retry_max_delay or 0 == (attempt % 2)): log.debug("%r: Failure committing offset to kafka: %r", self, failure) else: # We've retried until we hit the max delay, log alternately at warn log.warning("%r: Still failing committing offset to kafka: %r", self, failure) # Schedule a delayed call to retry the commit retry_delay = min(retry_delay * REQUEST_RETRY_FACTOR, self.retry_max_delay) self._commit_call = self.client.reactor.callLater( retry_delay, self._send_commit_request, retry_delay, attempt + 1) def _handle_auto_commit_error(self, failure): if self._start_d is not None and not self._start_d.called: self._start_d.errback(failure) def _handle_processor_error(self, failure): """Handle a failure in the processing of a block of messages This method is called when the processor func fails while processing a block of messages. Since we can't know how best to handle a processor failure, we just :func:`errback` our :func:`start` method's deferred to let our user know about the failure. """ # Check if we're stopping/stopped and the errback of the processor # deferred is just the cancelling we initiated. If so, we skip # notifying via the _start_d deferred, as it will be 'callback'd at the # end of stop() if not (self._stopping and failure.check(CancelledError)): if self._start_d: # Make sure we're not already stopped self._start_d.errback(failure) def _handle_fetch_error(self, failure): """A fetch request resulted in an error. Retry after our current delay When a fetch error occurs, we check to see if the Consumer is being stopped, and if so just return, trapping the CancelledError. If not, we check if the Consumer has a non-zero setting for :attr:`request_retry_max_attempts` and if so and we have reached that limit we errback() the Consumer's start() deferred with the failure. If not, we determine whether to log at debug or warning (we log at warning every other retry after backing off to the max retry delay, resulting in a warning message approximately once per minute with the default timings) We then wait our current :attr:`retry_delay`, and retry the fetch. We also increase our retry_delay by Apery's constant (1.20205) and note the failed fetch by incrementing :attr:`_fetch_attempt_count`. NOTE: this may retry forever. TODO: Possibly make this differentiate based on the failure """ # The _request_d deferred has fired, clear it. self._request_d = None if self._stopping and failure.check(CancelledError): # Not really an error return # Do we need to abort? if (self.request_retry_max_attempts != 0 and self._fetch_attempt_count >= self.request_retry_max_attempts): log.debug( "%r: Exhausted attempts: %d fetching messages from kafka: %r", self, self.request_retry_max_attempts, failure) self._start_d.errback(failure) return # Decide how to log this failure... If we have retried so many times # we're at the retry_max_delay, then we log at warning every other time # debug otherwise if (self.retry_delay < self.retry_max_delay or 0 == (self._fetch_attempt_count % 2)): log.debug("%r: Failure fetching messages from kafka: %r", self, failure) else: # We've retried until we hit the max delay, log at warn log.warning("%r: Still failing fetching messages from kafka: %r", self, failure) self._retry_fetch() def _handle_fetch_response(self, responses): """The callback handling the successful response from the fetch request Delivers the message list to the processor, handles per-message errors (ConsumerFetchSizeTooSmall), triggers another fetch request If the processor is still processing the last batch of messages, we defer this processing until it's done. Otherwise, we start another fetch request and submit the messages to the processor """ # Successful fetch, reset our retry delay self.retry_delay = self.retry_init_delay self._fetch_attempt_count = 1 # Check to see if we are still processing the last block we fetched... if self._msg_block_d: # We are still working through the last block of messages... # We have to wait until it's done, then process this response self._msg_block_d.addCallback( lambda _: self._handle_fetch_response(responses)) return # No ongoing processing, great, let's get some started. # Request no longer outstanding, clear the deferred tracker so we # can refetch self._request_d = None messages = [] try: for resp in responses: # We should really only ever get one... if resp.partition != self.partition: log.warning( "%r: Got response with partition: %r not our own: %r", self, resp.partition, self.partition) continue # resp.messages is a KafkaCodec._decode_message_set_iter # Note that 'message' here is really an OffsetAndMessage for message in resp.messages: # Check for messages included which are from prior to our # desired offset: can happen due to compressed message sets if message.offset < self._fetch_offset: log.debug( 'Skipping message at offset: %d, because its ' 'offset is less that our fetch offset: %d.', message.offset, self._fetch_offset) continue # Create a 'SourcedMessage' and add it to the messages list messages.append( SourcedMessage( message=message.message, offset=message.offset, topic=self.topic, partition=self.partition)) # Update our notion of from where to fetch. self._fetch_offset = message.offset + 1 except ConsumerFetchSizeTooSmall: # A message was too large for us to receive, given our current # buffer size. Grow it until it works, or we hit our max # Grow by 16x up to 1MB (could result in 16MB buf), then by 2x factor = 2 if self.buffer_size <= 2**20: factor = 16 if self.max_buffer_size is None: # No limit, increase until we succeed or fail to alloc RAM self.buffer_size *= factor elif (self.max_buffer_size is not None and self.buffer_size < self.max_buffer_size): # Limited, but currently below it. self.buffer_size = min( self.buffer_size * factor, self.max_buffer_size) else: # We failed, and are already at our max. Nothing we can do but # create a Failure and errback() our start() deferred log.error("Max fetch size %d too small", self.max_buffer_size) failure = Failure( ConsumerFetchSizeTooSmall( "Max buffer size:%d too small for message", self.max_buffer_size)) self._start_d.errback(failure) return log.debug( "Next message larger than fetch size, increasing " "to %d (~2x) and retrying", self.buffer_size) finally: # If we were able to extract any messages, deliver them to the # processor now. if messages: self._msg_block_d = Deferred() self._process_messages(messages) # start another fetch, if needed, but use callLater to avoid recursion self._retry_fetch(0) def _process_messages(self, messages): """Send messages to the `processor` callback to be processed In the case we have a commit policy, we send messages to the processor in blocks no bigger than auto_commit_every_n (if set). Otherwise, we send the entire message block to be processed. """ # Have we been told to shutdown? if self._shuttingdown: return # Do we have any messages to process? if not messages: # No, we're done with this block. If we had another fetch result # waiting, this callback will trigger the processing thereof. if self._msg_block_d: _msg_block_d, self._msg_block_d = self._msg_block_d, None _msg_block_d.callback(True) return # Yes, we've got some messages to process. # Default to processing the entire block... proc_block_size = sys.maxsize # Unless our auto commit_policy restricts us to process less if self.auto_commit_every_n: proc_block_size = self.auto_commit_every_n # Divide messages into two lists: one to process now, and remainder msgs_to_proc = messages[:proc_block_size] msgs_remainder = messages[proc_block_size:] # Call our processor callable and handle the possibility it returned # a deferred... last_offset = msgs_to_proc[-1].offset self._processor_d = d = maybeDeferred(self.processor, self, msgs_to_proc) log.debug('self.processor return: %r, last_offset: %r', d, last_offset) # Once the processor completes, clear our _processor_d d.addBoth(self._clear_processor_deferred) # Record the offset of the last processed message and check autocommit d.addCallback(self._update_processed_offset, last_offset) # If we were stopped, cancel the processor deferred. Note, we have to # do this here, in addition to in stop() because the processor func # itself could have called stop(), and then when it returned, we re-set # self._processor_d to the return of maybeDeferred(). if self._stopping or self._start_d is None: d.cancel() else: # Setup to process the rest of our messages d.addCallback(lambda _: self._process_messages(msgs_remainder)) # Add an error handler d.addErrback(self._handle_processor_error) def _do_fetch(self): """Send a fetch request if there isn't a request outstanding Sends a fetch request to the Kafka cluster to get messages at the current offset. When the response comes back, if there are messages, it delivers them to the :attr:`processor` callback and initiates another fetch request. If there is a recoverable error, the fetch is retried after :attr:`retry_delay`. In the case of an unrecoverable error, :func:`errback` is called on the :class:`Deferred` returned by :meth:`start()`. """ # Check for outstanding request. if self._request_d: log.debug("_do_fetch: Outstanding request: %r", self._request_d) return # Cleanup our _retry_call, if we have one if self._retry_call is not None: if self._retry_call.active(): self._retry_call.cancel() self._retry_call = None # Do we know our offset yet, or do we need to figure it out? if (self._fetch_offset == OFFSET_EARLIEST or self._fetch_offset == OFFSET_LATEST): # We need to fetch the offset for our topic/partition offset_request = OffsetRequest( self.topic, self.partition, self._fetch_offset, 1) self._request_d = self.client.send_offset_request([offset_request]) self._request_d.addCallbacks( self._handle_offset_response, self._handle_offset_error) elif self._fetch_offset == OFFSET_COMMITTED: # We need to fetch the committed offset for our topic/partition # Note we use the same callbacks, as the responses are "close # enough" for our needs here if not self.consumer_group: # consumer_group must be set for OFFSET_COMMITTED failure = Failure( InvalidConsumerGroupError("Bad Group_id:{0!r}".format( self.consumer_group))) self._start_d.errback(failure) request = OffsetFetchRequest(self.topic, self.partition) self._request_d = self.client.send_offset_fetch_request( self.consumer_group, [request]) self._request_d.addCallbacks( self._handle_offset_response, self._handle_offset_error) else: # Create fetch request payload for our partition request = FetchRequest( self.topic, self.partition, self._fetch_offset, self.buffer_size) # Send request and add handlers for the response self._request_d = self.client.send_fetch_request( [request], max_wait_time=self.fetch_max_wait_time, min_bytes=self.fetch_min_bytes) # We need a temp for this because if the response is already # available, _handle_fetch_response() will clear self._request_d d = self._request_d d.addCallback(self._handle_fetch_response) d.addErrback(self._handle_fetch_error) def _commit_timer_failed(self, fail): """Handle an error in the commit() function Our commit() function called by the LoopingCall failed. Some error probably came back from Kafka and check_error() raised the exception For now, just log the failure and restart the loop """ log.warning( '_commit_timer_failed: uncaught error %r: %s in _auto_commit', fail, fail.getBriefTraceback()) self._commit_looper_d = self._commit_looper.start( self.auto_commit_every_s, now=False) def _commit_timer_stopped(self, lCall): """We're shutting down, clean up our looping call...""" if self._commit_looper is not lCall: log.warning('_commit_timer_stopped with wrong timer:%s not:%s', lCall, self._commit_looper) else: log.debug('_commit_timer_stopped: %s %s', lCall, self._commit_looper) self._commit_looper = None self._commit_looper_d = None
class Consumer(object): """A simple Kafka consumer implementation This consumer consumes a single partition from a single topic, optionally automatically committing offsets. Use it as follows: - Create an instance of :class:`afkak.KafkaClient` with cluster connectivity details. - Create the :class:`Consumer`, supplying the client, topic, partition, processor function, and optionally fetch specifics, a consumer group, and a commit policy. - Call :meth:`.start` with the offset within the partition at which to start consuming messages. See :meth:`.start` for details. - Process the messages in your :attr:`.processor` callback, returning a :class:`~twisted.internet.defer.Deferred` to provide backpressure as needed. - Once processing resolves, :attr:`.processor` will be called again with the next batch of messages. - When desired, call :meth:`.stop` on the :class:`Consumer` to halt calls to the :attr:`processor` function and cancel any outstanding requests to the Kafka cluster. A :class:`Consumer` may be restarted once stopped. :ivar client: Connected :class:`KafkaClient` for submitting requests to the Kafka cluster. :ivar bytes topic: The topic from which to consume messages. :ivar int partition: The partition from which to consume. :ivar callable processor: The callback function to which the consumer and lists of messages (:class:`afkak.common.SourcedMessage`) will be submitted for processing. The function may return a :class:`~twisted.internet.defer.Deferred` and will not be called again until this Deferred resolves. :ivar bytes consumer_group: Optional consumer group ID for committing offsets of processed messages back to Kafka. :ivar bytes commit_metadata: Optional metadata to store with offsets commit. :ivar int auto_commit_every_n: Number of messages after which the consumer will automatically commit the offset of the last processed message to Kafka. Zero disables, defaulted to :data:`AUTO_COMMIT_MSG_COUNT`. :ivar int auto_commit_every_ms: Time interval in milliseconds after which the consumer will automatically commit the offset of the last processed message to Kafka. Zero disables, defaulted to :data:`AUTO_COMMIT_INTERVAL`. :ivar int fetch_size_bytes: Number of bytes to request in a :class:`FetchRequest`. Kafka will defer fulfilling the request until at least this many bytes can be returned. :ivar int fetch_max_wait_time: Max number of milliseconds the server should wait for that many bytes. :ivar int buffer_size: default 128K. Initial number of bytes to tell Kafka we have available. This will be raised x16 up to 1MB then double up to... :ivar int max_buffer_size: Max number of bytes to tell Kafka we have available. `None` means no limit (the default). Must be larger than the largest message we will find in our topic/partitions. :ivar float request_retry_init_delay: Number of seconds to wait before retrying a failed request to Kafka. :ivar float request_retry_max_delay: Maximum number of seconds to wait before retrying a failed request to Kafka (the delay is increased on each failure and reset to the initial delay upon success). :ivar int request_retry_max_attempts: Maximum number of attempts to make for any request. Default of zero means retry forever; other values must be positive and indicate the number of attempts to make before returning failure. :ivar int auto_offset_reset: What action should be taken when the broker responds to a fetch request with `OffsetOutOfRangeError`? - `OFFSET_EARLIEST`: request the oldest available messages. The consumer will read every message in the topic. - `OFFSET_LATEST`: request the most recent messages (this is the Java consumer's default). The consumer will read messages once new messages are produced to the topic. - `None`: fail on `OffsetOutOfRangeError` (Afkak's default). The `Deferred` returned by :meth:`Producer.start()` will errback. The caller may call :meth:`~.start()` again with the desired offset. The broker returns `OffsetOutOfRangeError` when the client requests an offset that isn't valid. This may mean that the requested offset no longer exists, e.g. if it was removed due to age. """ def __init__(self, client, topic, partition, processor, consumer_group=None, commit_metadata=None, auto_commit_every_n=None, auto_commit_every_ms=None, fetch_size_bytes=FETCH_MIN_BYTES, fetch_max_wait_time=FETCH_MAX_WAIT_TIME, buffer_size=FETCH_BUFFER_SIZE_BYTES, max_buffer_size=None, request_retry_init_delay=REQUEST_RETRY_MIN_DELAY, request_retry_max_delay=REQUEST_RETRY_MAX_DELAY, request_retry_max_attempts=0, auto_offset_reset=None): # Store away parameters self.client = client # KafkaClient self.topic = topic = _coerce_topic(topic) self.partition = partition # The partition within the topic we consume self.processor = processor # The callback we call with the msg list # Commit related parameters (Ensure the attr. exist, even if None) if consumer_group is not None: consumer_group = _coerce_consumer_group(consumer_group) self.consumer_group = consumer_group self.commit_metadata = commit_metadata if commit_metadata is not None and not isinstance(commit_metadata, bytes): raise TypeError('commit_metadata={!r} should be bytes'.format( commit_metadata)) self.auto_commit_every_n = None self.auto_commit_every_s = None if consumer_group: # Auto committing is possible... if auto_commit_every_n is None: auto_commit_every_n = AUTO_COMMIT_MSG_COUNT if auto_commit_every_ms is None: auto_commit_every_ms = AUTO_COMMIT_INTERVAL if not isinstance(auto_commit_every_n, Integral): raise ValueError('auto_commit_every_n parameter must be ' 'subtype of Integral') if not isinstance(auto_commit_every_ms, Integral): raise ValueError('auto_commit_every_ms parameter must be ' 'subtype of Integral') if auto_commit_every_ms < 0 or auto_commit_every_n < 0: raise ValueError('auto_commit_every_ms and auto_commit_every_n' ' must be non-negative') self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_s = float(auto_commit_every_ms) / 1000 else: if auto_commit_every_ms or auto_commit_every_n: raise ValueError('An auto_commit_every_x argument set without ' 'specifying consumer_group') # Fetch related instance variables self.fetch_min_bytes = fetch_size_bytes self.fetch_max_wait_time = int(fetch_max_wait_time) self.buffer_size = buffer_size self.max_buffer_size = max_buffer_size # request retry timing self.retry_delay = float(request_retry_init_delay) # fetch only self.retry_init_delay = float(request_retry_init_delay) self.retry_max_delay = float(request_retry_max_delay) self.request_retry_max_attempts = int(request_retry_max_attempts) if (not isinstance(request_retry_max_attempts, Integral) or request_retry_max_attempts < 0): raise ValueError( 'request_retry_max_attempts must be non-negative integer') self._fetch_attempt_count = 1 if auto_offset_reset not in [None, OFFSET_EARLIEST, OFFSET_LATEST]: raise ValueError( "auto_offset_reset must be in 'None', 'OFFSET_EARLIEST', 'OFFSET_LATEST'") self.auto_offset_reset = auto_offset_reset # # Internal state tracking attributes self._fetch_offset = None # We don't know at what offset to fetch yet self._last_processed_offset = None # Last msg processed offset self._last_committed_offset = None # The last offset stored in Kafka self._stopping = False # We're not stopping yet... self._shuttingdown = False # We're not shutting down either self._shutdown_d = None # deferred for tracking shutdown request self._commit_looper = None # Looping call for auto-commit self._commit_looper_d = None # Deferred for running looping call self._commit_ds = [] # Deferreds to notify when commit completes self._commit_req = None # Track outstanding commit request # For tracking various async operations self._start_d = None # deferred for alerting user of errors self._request_d = None # outstanding KafkaClient request deferred self._retry_call = None # IDelayedCall object for delayed retries self._commit_call = None # IDelayedCall for delayed commit retries self._msg_block_d = None # deferred for each block of messages self._processor_d = None # deferred for a result from processor self._state = '[initialized]' # Keep track of state for debugging # Check parameters for sanity if max_buffer_size is not None and buffer_size > max_buffer_size: raise ValueError("buffer_size (%d) is greater than " "max_buffer_size (%d)" % (buffer_size, max_buffer_size)) if not isinstance(self.partition, Integral): raise ValueError('partition parameter must be subtype of Integral') def __repr__(self): return '<{} {} topic={}, partition={}, processor={}>'.format( self.__class__.__name__, self._state, self.topic, self.partition, self.processor, ) def start(self, start_offset): """ Starts fetching messages from Kafka and delivering them to the :attr:`.processor` function. :param int start_offset: The offset within the partition from which to start fetching. Special values include: :const:`OFFSET_EARLIEST`, :const:`OFFSET_LATEST`, and :const:`OFFSET_COMMITTED`. If the supplied offset is :const:`OFFSET_EARLIEST` or :const:`OFFSET_LATEST` the :class:`Consumer` will use the OffsetRequest Kafka API to retrieve the actual offset used for fetching. In the case :const:`OFFSET_COMMITTED` is used, `commit_policy` MUST be set on the Consumer, and the Consumer will use the OffsetFetchRequest Kafka API to retrieve the actual offset used for fetching. :returns: A :class:`~twisted.internet.defer.Deferred` which will resolve successfully when the consumer is cleanly stopped, or with a failure if the :class:`Consumer` encounters an error from which it is unable to recover. :raises: :exc:`RestartError` if already running. """ # Have we been started already, and not stopped? if self._start_d is not None: raise RestartError("Start called on already-started consumer") # Keep track of state for debugging self._state = '[started]' # Create and return a deferred for alerting on errors/stoppage start_d = self._start_d = Deferred() # Start a new fetch request, possibly just for the starting offset self._fetch_offset = start_offset self._do_fetch() # Set up the auto-commit timer, if needed if self.consumer_group and self.auto_commit_every_s: self._commit_looper = LoopingCall(self._auto_commit) self._commit_looper.clock = self.client.reactor self._commit_looper_d = self._commit_looper.start( self.auto_commit_every_s, now=False) self._commit_looper_d.addCallbacks(self._commit_timer_stopped, self._commit_timer_failed) return start_d def shutdown(self): """Gracefully shutdown the consumer Consumer will complete any outstanding processing, commit its current offsets (if so configured) and stop. Returns deferred which callbacks with a tuple of: (last processed offset, last committed offset) if it was able to successfully commit, or errbacks with the commit failure, if any, or fail(RestopError) if consumer is not running. """ def _handle_shutdown_commit_success(result): """Handle the result of the commit attempted by shutdown""" self._shutdown_d, d = None, self._shutdown_d self.stop() self._shuttingdown = False # Shutdown complete d.callback((self._last_processed_offset, self._last_committed_offset)) def _handle_shutdown_commit_failure(failure): """Handle failure of commit() attempted by shutdown""" if failure.check(OperationInProgress): failure.value.deferred.addCallback(_commit_and_stop) return self._shutdown_d, d = None, self._shutdown_d self.stop() self._shuttingdown = False # Shutdown complete d.errback(failure) def _commit_and_stop(result): """Commit the current offsets (if needed) and stop the consumer""" if not self.consumer_group: # No consumer group, no committing return _handle_shutdown_commit_success(None) # Need to commit prior to stopping self.commit().addCallbacks(_handle_shutdown_commit_success, _handle_shutdown_commit_failure) # If we're not running, return an failure if self._start_d is None: return fail(Failure( RestopError("Shutdown called on non-running consumer"))) # If we're called multiple times, return a failure if self._shutdown_d: return fail(Failure( RestopError("Shutdown called more than once."))) # Set our _shuttingdown flag, so our _process_message routine will stop # feeding new messages to the processor, and fetches won't be retried self._shuttingdown = True # Keep track of state for debugging self._state = '[shutting down]' # Create a deferred to track the shutdown self._shutdown_d = d = Deferred() # Are we waiting for the processor to complete? If so, when it's done, # commit our offsets and stop. if self._processor_d: self._processor_d.addCallback(_commit_and_stop) else: # No need to wait for the processor, we can commit and stop now _commit_and_stop(None) # return the deferred return d def stop(self): """ Stop the consumer and return offset of last processed message. This cancels all outstanding operations. Also, if the deferred returned by `start` hasn't been called, it is called with a tuple consisting of the last processed offset and the last committed offset. :raises: :exc:`RestopError` if the :class:`Consumer` is not running. """ if self._start_d is None: raise RestopError("Stop called on non-running consumer") self._stopping = True # Keep track of state for debugging self._state = '[stopping]' # Are we waiting for a request to come back? if self._request_d: self._request_d.cancel() # Are we working our way through a block of messages? if self._msg_block_d: # Need to add a cancel handler... _msg_block_d, self._msg_block_d = self._msg_block_d, None _msg_block_d.addErrback(lambda fail: fail.trap(CancelledError)) _msg_block_d.cancel() # Are we waiting for the processor to complete? if self._processor_d: self._processor_d.cancel() # Are we waiting to retry a request? if self._retry_call: self._retry_call.cancel() # Are we waiting on a commit request? if self._commit_ds: while self._commit_ds: d = self._commit_ds.pop() d.cancel() if self._commit_req: self._commit_req.cancel() # Are we waiting to retry a commit? if self._commit_call: self._commit_call.cancel() # Do we have an auto-commit looping call? if self._commit_looper is not None: self._commit_looper.stop() # Done stopping self._stopping = False # Keep track of state for debugging self._state = '[stopped]' # Clear and possibly callback our start() Deferred self._start_d, d = None, self._start_d if not d.called: d.callback((self._last_processed_offset, self._last_committed_offset)) # Return the offset of the message we last processed return self._last_processed_offset def commit(self): """ Commit the offset of the message we last processed if it is different from what we believe is the last offset committed to Kafka. .. note:: It is possible to commit a smaller offset than Kafka has stored. This is by design, so we can reprocess a Kafka message stream if desired. On error, will retry according to :attr:`request_retry_max_attempts` (by default, forever). If called while a commit operation is in progress, and new messages have been processed since the last request was sent then the commit will fail with :exc:`OperationInProgress`. The :exc:`OperationInProgress` exception wraps a :class:`~twisted.internet.defer.Deferred` which fires when the outstanding commit operation completes. :returns: A :class:`~twisted.internet.defer.Deferred` which resolves with the committed offset when the operation has completed. It will resolve immediately if the current offset and the last committed offset do not differ. """ # Can't commit without a consumer_group if not self.consumer_group: return fail(Failure(InvalidConsumerGroupError( "Bad Group_id:{0!r}".format(self.consumer_group)))) # short circuit if we are 'up to date', or haven't processed anything if ((self._last_processed_offset is None) or (self._last_processed_offset == self._last_committed_offset)): return succeed(self._last_committed_offset) # If we're currently processing a commit we return a failure # with a deferred we'll fire when the in-progress one completes if self._commit_ds: d = Deferred() self._commit_ds.append(d) return fail(OperationInProgress(d)) # Ok, we have processed messages since our last commit attempt, and # we're not currently waiting on a commit request to complete: # Start a new one d = Deferred() self._commit_ds.append(d) # Send the request self._send_commit_request() # Reset the commit_looper here, rather than on success to give # more stability to the commit interval. if self._commit_looper is not None: self._commit_looper.reset() # return the deferred return d # # Private Methods # # def _retry_auto_commit(self, result, by_count=False): self._auto_commit(by_count) return result def _auto_commit(self, by_count=False): """Check if we should start a new commit operation and commit""" # Check if we are even supposed to do any auto-committing if (self._stopping or self._shuttingdown or (not self._start_d) or (self._last_processed_offset is None) or (not self.consumer_group) or (by_count and not self.auto_commit_every_n)): return # If we're auto_committing because the timer expired, or by count and # we don't have a record of our last_committed_offset, or we've # processed enough messages since our last commit, then try to commit if (not by_count or self._last_committed_offset is None or (self._last_processed_offset - self._last_committed_offset ) >= self.auto_commit_every_n): if not self._commit_ds: commit_d = self.commit() commit_d.addErrback(self._handle_auto_commit_error) else: # We're waiting on the last commit to complete, so add a # callback to be called when the current request completes d = Deferred() d.addCallback(self._retry_auto_commit, by_count) self._commit_ds.append(d) def _retry_fetch(self, after=None): """ Schedule a delayed :meth:`_do_fetch` call after a failure :param float after: The delay in seconds after which to do the retried fetch. If `None`, our internal :attr:`retry_delay` is used, and adjusted by :const:`REQUEST_RETRY_FACTOR`. """ # Have we been told to stop or shutdown? Then don't actually retry. if self._stopping or self._shuttingdown or self._start_d is None: # Stopping, or stopped already? No more fetching. return if self._retry_call is None: if after is None: after = self.retry_delay self.retry_delay = min(self.retry_delay * REQUEST_RETRY_FACTOR, self.retry_max_delay) self._fetch_attempt_count += 1 self._retry_call = self.client.reactor.callLater( after, self._do_fetch) def _handle_offset_response(self, response): """ Handle responses to both OffsetRequest and OffsetFetchRequest, since they are similar enough. :param response: A tuple of a single OffsetFetchResponse or OffsetResponse """ # Got a response, clear our outstanding request deferred self._request_d = None # Successful request, reset our retry delay, count, etc self.retry_delay = self.retry_init_delay self._fetch_attempt_count = 1 response = response[0] if hasattr(response, 'offsets'): # It's a response to an OffsetRequest self._fetch_offset = response.offsets[0] else: # It's a response to an OffsetFetchRequest # Make sure we got a valid offset back. Kafka uses -1 to indicate # no committed offset was retrieved if response.offset == OFFSET_NOT_COMMITTED: self._fetch_offset = OFFSET_EARLIEST else: self._fetch_offset = response.offset + 1 self._last_committed_offset = response.offset self._do_fetch() def _handle_offset_error(self, failure): """ Retry the offset fetch request if appropriate. Once the :attr:`.retry_delay` reaches our :attr:`.retry_max_delay`, we log a warning. This should perhaps be extended to abort sooner on certain errors. """ # outstanding request got errback'd, clear it self._request_d = None if self._stopping and failure.check(CancelledError): # Not really an error return # Do we need to abort? if (self.request_retry_max_attempts != 0 and self._fetch_attempt_count >= self.request_retry_max_attempts): log.debug( "%r: Exhausted attempts: %d fetching offset from kafka: %r", self, self.request_retry_max_attempts, failure) self._start_d.errback(failure) return # Decide how to log this failure... If we have retried so many times # we're at the retry_max_delay, then we log at warning every other time # debug otherwise if (self.retry_delay < self.retry_max_delay or 0 == (self._fetch_attempt_count % 2)): log.debug("%r: Failure fetching offset from kafka: %r", self, failure) else: # We've retried until we hit the max delay, log at warn log.warning("%r: Still failing fetching offset from kafka: %r", self, failure) self._retry_fetch() def _clear_processor_deferred(self, result): self._processor_d = None # It has fired, we can clear it return result def _update_processed_offset(self, result, offset): self._last_processed_offset = offset self._auto_commit(by_count=True) def _clear_commit_req(self, result): self._commit_req = None # It has fired, we can clear it return result def _update_committed_offset(self, result, offset): # successful commit request completed self._last_committed_offset = offset self._deliver_commit_result(offset) return offset def _deliver_commit_result(self, result): # Let anyone waiting know the commit completed. Handle the case where # they try to commit from the callback by preserving self._commit_ds # as a local, but clearing the attribute itself. commit_ds, self._commit_ds = self._commit_ds, [] while commit_ds: d = commit_ds.pop() d.callback(result) def _send_commit_request(self, retry_delay=None, attempt=None): """Send a commit request with our last_processed_offset""" # If there's a _commit_call, and it's not active, clear it, it probably # just called us... if self._commit_call and not self._commit_call.active(): self._commit_call = None # Make sure we only have one outstanding commit request at a time if self._commit_req is not None: raise OperationInProgress(self._commit_req) # Handle defaults if retry_delay is None: retry_delay = self.retry_init_delay if attempt is None: attempt = 1 # Create new OffsetCommitRequest with the latest processed offset commit_offset = self._last_processed_offset commit_request = OffsetCommitRequest( self.topic, self.partition, commit_offset, TIMESTAMP_INVALID, self.commit_metadata) log.debug("Committing off=%d grp=%s tpc=%s part=%s req=%r", self._last_processed_offset, self.consumer_group, self.topic, self.partition, commit_request) # Send the request, add our callbacks self._commit_req = d = self.client.send_offset_commit_request( self.consumer_group, [commit_request]) d.addBoth(self._clear_commit_req) d.addCallbacks( self._update_committed_offset, self._handle_commit_error, callbackArgs=(commit_offset,), errbackArgs=(retry_delay, attempt)) def _handle_commit_error(self, failure, retry_delay, attempt): """ Retry the commit request, depending on failure type Depending on the type of the failure, we retry the commit request with the latest processed offset, or callback/errback self._commit_ds """ # Check if we are stopping and the request was cancelled if self._stopping and failure.check(CancelledError): # Not really an error return self._deliver_commit_result(self._last_committed_offset) # Check that the failure type is a Kafka error...this could maybe be # a tighter check to determine whether a retry will succeed... if not failure.check(KafkaError): log.error("Unhandleable failure during commit attempt: %r\n\t%r", failure, failure.getBriefTraceback()) return self._deliver_commit_result(failure) # Do we need to abort? if (self.request_retry_max_attempts != 0 and attempt >= self.request_retry_max_attempts): log.debug("%r: Exhausted attempts: %d to commit offset: %r", self, self.request_retry_max_attempts, failure) return self._deliver_commit_result(failure) # Check the retry_delay to see if we should log at the higher level # Using attempts % 2 gets us 1-warn/minute with defaults timings if retry_delay < self.retry_max_delay or 0 == (attempt % 2): log.debug("%r: Failure committing offset to kafka: %r", self, failure) else: # We've retried until we hit the max delay, log alternately at warn log.warning("%r: Still failing committing offset to kafka: %r", self, failure) # Schedule a delayed call to retry the commit retry_delay = min(retry_delay * REQUEST_RETRY_FACTOR, self.retry_max_delay) self._commit_call = self.client.reactor.callLater( retry_delay, self._send_commit_request, retry_delay, attempt + 1) def _handle_auto_commit_error(self, failure): if self._start_d is not None and not self._start_d.called: self._start_d.errback(failure) def _handle_processor_error(self, failure): """Handle a failure in the processing of a block of messages This method is called when the processor func fails while processing a block of messages. Since we can't know how best to handle a processor failure, we just :func:`errback` our :func:`start` method's deferred to let our user know about the failure. """ # Check if we're stopping/stopped and the errback of the processor # deferred is just the cancelling we initiated. If so, we skip # notifying via the _start_d deferred, as it will be 'callback'd at the # end of stop() if not (self._stopping and failure.check(CancelledError)): if self._start_d: # Make sure we're not already stopped self._start_d.errback(failure) def _handle_fetch_error(self, failure): """A fetch request resulted in an error. Retry after our current delay When a fetch error occurs, we check to see if the Consumer is being stopped, and if so just return, trapping the CancelledError. If not, we check if the Consumer has a non-zero setting for :attr:`request_retry_max_attempts` and if so and we have reached that limit we errback() the Consumer's start() deferred with the failure. If not, we determine whether to log at debug or warning (we log at warning every other retry after backing off to the max retry delay, resulting in a warning message approximately once per minute with the default timings) We then wait our current :attr:`retry_delay`, and retry the fetch. We also increase our retry_delay by Apery's constant (1.20205) and note the failed fetch by incrementing :attr:`_fetch_attempt_count`. NOTE: this may retry forever. TODO: Possibly make this differentiate based on the failure """ # The _request_d deferred has fired, clear it. self._request_d = None if failure.check(OffsetOutOfRangeError): if self.auto_offset_reset is None: self._start_d.errback(failure) return self._fetch_offset = self.auto_offset_reset if self._stopping and failure.check(CancelledError): # Not really an error return # Do we need to abort? if (self.request_retry_max_attempts != 0 and self._fetch_attempt_count >= self.request_retry_max_attempts): log.debug( "%r: Exhausted attempts: %d fetching messages from kafka: %r", self, self.request_retry_max_attempts, failure) self._start_d.errback(failure) return # Decide how to log this failure... If we have retried so many times # we're at the retry_max_delay, then we log at warning every other time # debug otherwise if (self.retry_delay < self.retry_max_delay or 0 == (self._fetch_attempt_count % 2)): log.debug("%r: Failure fetching messages from kafka: %r", self, failure) else: # We've retried until we hit the max delay, log at warn log.warning("%r: Still failing fetching messages from kafka: %r", self, failure) self._retry_fetch() def _handle_fetch_response(self, responses): """The callback handling the successful response from the fetch request Delivers the message list to the processor, handles per-message errors (ConsumerFetchSizeTooSmall), triggers another fetch request If the processor is still processing the last batch of messages, we defer this processing until it's done. Otherwise, we start another fetch request and submit the messages to the processor """ # Successful fetch, reset our retry delay self.retry_delay = self.retry_init_delay self._fetch_attempt_count = 1 # Check to see if we are still processing the last block we fetched... if self._msg_block_d: # We are still working through the last block of messages... # We have to wait until it's done, then process this response self._msg_block_d.addCallback( lambda _: self._handle_fetch_response(responses)) return # No ongoing processing, great, let's get some started. # Request no longer outstanding, clear the deferred tracker so we # can refetch self._request_d = None messages = [] try: for resp in responses: # We should really only ever get one... if resp.partition != self.partition: log.warning( "%r: Got response with partition: %r not our own: %r", self, resp.partition, self.partition) continue # resp.messages is a KafkaCodec._decode_message_set_iter # Note that 'message' here is really an OffsetAndMessage for message in resp.messages: # Check for messages included which are from prior to our # desired offset: can happen due to compressed message sets if message.offset < self._fetch_offset: log.debug( 'Skipping message at offset: %d, because its ' 'offset is less that our fetch offset: %d.', message.offset, self._fetch_offset) continue # Create a 'SourcedMessage' and add it to the messages list messages.append( SourcedMessage( message=message.message, offset=message.offset, topic=self.topic, partition=self.partition)) # Update our notion of from where to fetch. self._fetch_offset = message.offset + 1 except ConsumerFetchSizeTooSmall: # A message was too large for us to receive, given our current # buffer size. Grow it until it works, or we hit our max # Grow by 16x up to 1MB (could result in 16MB buf), then by 2x factor = 2 if self.buffer_size <= 2**20: factor = 16 if self.max_buffer_size is None: # No limit, increase until we succeed or fail to alloc RAM self.buffer_size *= factor elif (self.max_buffer_size is not None and self.buffer_size < self.max_buffer_size): # Limited, but currently below it. self.buffer_size = min( self.buffer_size * factor, self.max_buffer_size) else: # We failed, and are already at our max. Nothing we can do but # create a Failure and errback() our start() deferred log.error("Max fetch size %d too small", self.max_buffer_size) failure = Failure( ConsumerFetchSizeTooSmall( "Max buffer size:%d too small for message", self.max_buffer_size)) self._start_d.errback(failure) return log.debug( "Next message larger than fetch size, increasing " "to %d (~2x) and retrying", self.buffer_size) finally: # If we were able to extract any messages, deliver them to the # processor now. if messages: self._msg_block_d = Deferred() self._process_messages(messages) # start another fetch, if needed, but use callLater to avoid recursion self._retry_fetch(0) def _process_messages(self, messages): """Send messages to the `processor` callback to be processed In the case we have a commit policy, we send messages to the processor in blocks no bigger than auto_commit_every_n (if set). Otherwise, we send the entire message block to be processed. """ # Have we been told to shutdown? if self._shuttingdown: return # Do we have any messages to process? if not messages: # No, we're done with this block. If we had another fetch result # waiting, this callback will trigger the processing thereof. if self._msg_block_d: _msg_block_d, self._msg_block_d = self._msg_block_d, None _msg_block_d.callback(True) return # Yes, we've got some messages to process. # Default to processing the entire block... proc_block_size = sys.maxsize # Unless our auto commit_policy restricts us to process less if self.auto_commit_every_n: proc_block_size = self.auto_commit_every_n # Divide messages into two lists: one to process now, and remainder msgs_to_proc = messages[:proc_block_size] msgs_remainder = messages[proc_block_size:] # Call our processor callable and handle the possibility it returned # a deferred... last_offset = msgs_to_proc[-1].offset self._processor_d = d = maybeDeferred(self.processor, self, msgs_to_proc) log.debug('self.processor return: %r, last_offset: %r', d, last_offset) # Once the processor completes, clear our _processor_d d.addBoth(self._clear_processor_deferred) # Record the offset of the last processed message and check autocommit d.addCallback(self._update_processed_offset, last_offset) # If we were stopped, cancel the processor deferred. Note, we have to # do this here, in addition to in stop() because the processor func # itself could have called stop(), and then when it returned, we re-set # self._processor_d to the return of maybeDeferred(). if self._stopping or self._start_d is None: d.cancel() else: # Setup to process the rest of our messages d.addCallback(lambda _: self._process_messages(msgs_remainder)) # Add an error handler d.addErrback(self._handle_processor_error) def _do_fetch(self): """Send a fetch request if there isn't a request outstanding Sends a fetch request to the Kafka cluster to get messages at the current offset. When the response comes back, if there are messages, it delivers them to the :attr:`processor` callback and initiates another fetch request. If there is a recoverable error, the fetch is retried after :attr:`retry_delay`. In the case of an unrecoverable error, :func:`errback` is called on the :class:`Deferred` returned by :meth:`start()`. """ # Check for outstanding request. if self._request_d: log.debug("_do_fetch: Outstanding request: %r", self._request_d) return # Cleanup our _retry_call, if we have one if self._retry_call is not None: if self._retry_call.active(): self._retry_call.cancel() self._retry_call = None # Do we know our offset yet, or do we need to figure it out? if (self._fetch_offset == OFFSET_EARLIEST or self._fetch_offset == OFFSET_LATEST): # We need to fetch the offset for our topic/partition offset_request = OffsetRequest( self.topic, self.partition, self._fetch_offset, 1) self._request_d = self.client.send_offset_request([offset_request]) self._request_d.addCallbacks( self._handle_offset_response, self._handle_offset_error) elif self._fetch_offset == OFFSET_COMMITTED: # We need to fetch the committed offset for our topic/partition # Note we use the same callbacks, as the responses are "close # enough" for our needs here if not self.consumer_group: # consumer_group must be set for OFFSET_COMMITTED failure = Failure( InvalidConsumerGroupError("Bad Group_id:{0!r}".format( self.consumer_group))) self._start_d.errback(failure) request = OffsetFetchRequest(self.topic, self.partition) self._request_d = self.client.send_offset_fetch_request( self.consumer_group, [request]) self._request_d.addCallbacks( self._handle_offset_response, self._handle_offset_error) else: # Create fetch request payload for our partition request = FetchRequest( self.topic, self.partition, self._fetch_offset, self.buffer_size) # Send request and add handlers for the response self._request_d = self.client.send_fetch_request( [request], max_wait_time=self.fetch_max_wait_time, min_bytes=self.fetch_min_bytes) # We need a temp for this because if the response is already # available, _handle_fetch_response() will clear self._request_d d = self._request_d d.addCallback(self._handle_fetch_response) d.addErrback(self._handle_fetch_error) def _commit_timer_failed(self, fail): """Handle an error in the commit() function Our commit() function called by the LoopingCall failed. Some error probably came back from Kafka and _check_error() raised the exception For now, just log the failure and restart the loop """ log.warning( '_commit_timer_failed: uncaught error %r: %s in _auto_commit', fail, fail.getBriefTraceback()) self._commit_looper_d = self._commit_looper.start( self.auto_commit_every_s, now=False) def _commit_timer_stopped(self, lCall): """We're shutting down, clean up our looping call...""" if self._commit_looper is not lCall: log.warning('_commit_timer_stopped with wrong timer:%s not:%s', lCall, self._commit_looper) else: log.debug('_commit_timer_stopped: %s %s', lCall, self._commit_looper) self._commit_looper = None self._commit_looper_d = None
class NetworkService: """ Prisma Network. """ def __init__(self): self.logger = logging.getLogger('Network') self.status = STATUS_INIT self.callLater = Prisma().callLater self.reactor = reactor self.factory = NetworkFactory() self.listen_port = CONFIG.getint('network', 'listen_port') self.listener = None self.node_id = self.get_node_id() self.get_peers_lc = None self.get_events_lc = None self.get_peers_timer = CONFIG.getint('network', 'get_peers_timer') self.get_events_timer = CONFIG.getint('network', 'get_events_timer') self.timeout = CONFIG.getint('network', 'timeout') def start(self): """ Start. Simply listen to the port and connect to a random peer. If remote_host and remote_port are specified, connect to that specific node. """ # listen to the port self.listener = self.reactor.listenTCP(self.listen_port, self.factory) # bootstrap self.bootstrap() # start get_peers looping call self.get_peers_lc = LoopingCall( lambda: self.get_peers_from_random_peer()) self.get_peers_lc.start(self.get_peers_timer) # start get_events looping call self.get_events_lc = LoopingCall( lambda: self.get_events_from_random_peer()) self.get_events_lc.start(self.get_events_timer) def stop(self): """ Close connections. """ try: if self.get_peers_lc is not None: self.get_peers_lc.stop() if self.get_events_lc is not None: self.get_events_lc.stop() self.listener.stopListening() except Exception as e: self.logger.critical('Error while stopping Prisma network: ' + str(e)) def bootstrap(self): """ Delete all peers and bootstrap the peers in the config file by connecting and asking them the peers they know. """ self.status = STATUS_BOOTSTRAPPING # delete first all peers Prisma().db.delete_peers() # bootstrap each of the nodes bootstrap_nodes = json.loads(CONFIG.get('bootstrap', 'bootstrap_nodes')) for bootstrap in bootstrap_nodes: host, port = bootstrap.split(":") self.bootstrap_peer(host, int(port)) # get state from a random node self.download_state_from_random_peer() def bootstrap_peer(self, host, port): """ This will ask for send_get_peers() and if it's successful it will add it to the available peers. :param host: host to connect to :param port: port to connect to """ self.logger.info('Bootstrapping peer {0}:{1}'.format(host, str(port))) client = TCP4ClientEndpoint(self.reactor, host, port, self.timeout) d = client.connect(self.factory) # in case of connection ok, add the callbacks for get_peers and call send_get_peers def connection_ok(protocol): protocol.d = defer.Deferred() def get_peers_ok(_): self.logger.info('Successfully got peers from {0}:{1}'.format( host, port)) protocol.d.addCallback(get_peers_ok) def get_peers_error(reason): error_message = reason.getErrorMessage() self.logger.error( 'Error when getting peers from {0}:{1}: {2}'.format( host, port, error_message)) protocol.close_connection() protocol.d.addErrback(get_peers_error) protocol.send_get_peers() d.addCallback(connection_ok) # in case of connection error just show in debug def connection_error(reason): self.logger.warning( 'Error while connecting to {0}:{1}: {2}'.format( host, port, reason.getErrorMessage())) d.addErrback(connection_error) def download_state_from_random_peer(self): """ This will get the state of a random peer. """ random_peer = Prisma().db.get_random_peer() # if list is empty if not random_peer: self.logger.info( 'No peers to connect to. Wait to some peer to connect with you or restart with a peer.' ) self.callLater(2, lambda: self.download_state_from_random_peer()) return random_peer = random_peer.pop() host = random_peer['host'] port = random_peer['port'] client = TCP4ClientEndpoint(self.reactor, host, port, self.timeout) d = client.connect(self.factory) # in case of connection ok, add the callbacks for get_state def connection_ok(protocol): protocol.d = defer.Deferred() def get_state_ok(_): self.logger.info( 'Successfully got the state from {0}:{1}'.format( host, port)) self.status = STATUS_READY protocol.d.addCallback(get_state_ok) def get_state_error(reason): error_message = reason.getErrorMessage() self.logger.error( 'Error when getting state from {0}:{1}: {2}'.format( host, port, error_message)) protocol.close_connection() self.callLater(0, lambda: self.download_state_from_random_peer()) protocol.d.addErrback(get_state_error) protocol.send_get_state() d.addCallback(connection_ok) # in case of connection error show in debug and try again def connection_error(reason): # in case of error remove the peer from the database addr = random_peer['host'] + ':' + str(random_peer['port']) self.logger.debug('Error while connecting to {0}: {1}'.format( addr, reason.getErrorMessage())) Prisma().db.delete_peer(random_peer['_id']) # then call later again self.callLater(0, lambda: self.download_state_from_random_peer()) d.addErrback(connection_error) def get_peers_from_random_peer(self): """ Gets a random a peer from the database and connects to it and asks for peers. """ # get a random peer from database random_peer = Prisma().db.get_random_peer() # if list is empty if not random_peer: self.logger.info( 'No peers to connect to. Wait to some peer to connect with you or restart with a peer.' ) return random_peer = random_peer.pop() host = random_peer['host'] port = random_peer['port'] client = TCP4ClientEndpoint(self.reactor, host, port, self.timeout) d = client.connect(self.factory) # in case of connection ok, add the callbacks for get_peers and call send_get_peers def connection_ok(protocol): protocol.d = defer.Deferred() def get_peers_ok(_): self.logger.info('Successfully got peers from {0}:{1}'.format( host, port)) protocol.d.addCallback(get_peers_ok) def get_peers_error(reason): error_message = reason.getErrorMessage() self.logger.error( 'Error when getting peers from {0}:{1}: {2}'.format( host, port, error_message)) protocol.close_connection() self.get_peers_lc.reset() self.get_peers_from_random_peer() protocol.d.addErrback(get_peers_error) protocol.send_get_peers() d.addCallback(connection_ok) # in case of connection error show in debug and try again def connection_error(reason): # in case of error remove the peer from the database addr = random_peer['host'] + ':' + str(random_peer['port']) self.logger.debug('Error while connecting to {0}: {1}'.format( addr, reason.getErrorMessage())) Prisma().db.delete_peer(random_peer['_id']) # then restart timer and try again get_peers_from_random_peer self.get_peers_lc.reset() self.get_peers_from_random_peer() d.addErrback(connection_error) def get_events_from_random_peer(self): """ Gets a random a peer from the database and connects to it and asks for events. """ # check first if ready if self.status != STATUS_READY: self.logger.info('Not ready, still bootstrapping.') return # check that we have enough peers peer_count = Prisma().db.count_peers() if peer_count < 3: if not CONFIG.getboolean('developer', 'developer_mode') or peer_count < 1: self.logger.debug( 'Not enough peers found in the network, skipping...') return random_peer = Prisma().db.get_random_peer().pop() host = random_peer['host'] port = random_peer['port'] client = TCP4ClientEndpoint(self.reactor, host, port, self.timeout) d = client.connect(self.factory) # in case of connection ok, add the callbacks for get_peers and call send_get_peers def connection_ok(protocol): protocol.d = defer.Deferred() def get_events_ok(_): self.logger.info('Successfully got events from {0}:{1}'.format( host, port)) protocol.d.addCallback(get_events_ok) def get_events_error(reason): error_message = reason.getErrorMessage() self.logger.error( 'Error when getting events from {0}:{1}: {2}'.format( host, port, error_message)) protocol.close_connection() self.get_events_lc.reset() self.get_events_from_random_peer() protocol.d.addErrback(get_events_error) protocol.send_get_events() d.addCallback(connection_ok) # in case of connection error show in debug and try again def connection_error(reason): # in case of error remove the peer from the database addr = random_peer['host'] + ':' + str(random_peer['port']) self.logger.debug('Error while connecting to {0}: {1}'.format( addr, reason.getErrorMessage())) Prisma().db.delete_peer(random_peer['_id']) # then restart timer and try again get_peers_from_random_peer self.get_events_lc.reset() self.get_events_from_random_peer() d.addErrback(connection_error) @staticmethod def get_node_id(): """ Generate a node id based on public key. :return: string """ crypto = Crypto() pub_key_sha = crypto.sha256(str(Prisma().graph.keystore['publicKey'])) node_id = pub_key_sha[:10] return hexlify(node_id).decode('utf-8')
class Coordinator(object): """ Private group coordinator implementation. You almost certainly want to use :class:`afkak.ConsumerGroup` instead. :param str group_id: Name of the consumer group to join for dynamic partition assignment, and to use for fetching and committing offsets. :param topics: Names of topics to consume. At least one topic must be given. :type topics: List[str] :param float session_timeout_ms: The timeout used to detect failures when using Kafka's group management facilities. Default: 30000 :param float heartbeat_interval_ms: The expected time in milliseconds between heartbeats to the consumer coordinator when using Kafka's group management feature. Heartbeats are used to ensure that the consumer's session stays active and to facilitate rebalancing when new consumers join or leave the group. The value must be set lower than session_timeout_ms, but typically should be set no higher than 1/3 of that value. It can be adjusted even lower to control the expected time for normal rebalances. Default: 5000 :param float initial_backoff_ms: Milliseconds to backoff between attempts to find the group coordinator broker at startup. Default: 1000 :param float retry_backoff_ms: Milliseconds to backoff when retrying from expected errors and group membership changes. Default: 100. :param float fatal_backoff_ms: Milliseconds to backoff when retrying on unexpected kafka errors Default: 10000. """ def __init__(self, client, group_id, topics, session_timeout_ms=30000, heartbeat_interval_ms=5000, initial_backoff_ms=1000, retry_backoff_ms=100, fatal_backoff_ms=10000): self.client = client self.group_id = group_id self.topics = topics if not topics: raise ValueError('topics must be non-empty') self.session_timeout_ms = session_timeout_ms self.heartbeat_interval_ms = heartbeat_interval_ms self.initial_backoff_ms = initial_backoff_ms self.retry_backoff_ms = retry_backoff_ms self.fatal_backoff_ms = fatal_backoff_ms self.member_id = "" self.leader_id = None self.generation_id = None self.coordinator_broker = None # BrokerMetadata of the coordinator self.protocol = _ConsumerProtocol() self._start_d = None self._state = '[initialized]' # Keep track of state for debugging # do we need to rejoin? self._rejoin_needed = True # are we shutting down? self._stopping = False # delayedcall for a pending rejoin self._rejoin_wait_dc = None # deferred for a rejoin in progress self._rejoin_d = False # loopingcall for the heartbeat timer self._heartbeat_looper = LoopingCall(self._heartbeat) self._heartbeat_looper.clock = self.client.reactor self._heartbeat_looper_d = None self._heartbeat_request_d = None def __repr__(self): return '<{}.{} group={} member={} topics={} {}>'.format( __name__, self.__class__.__name__, self.group_id, self.member_id, self.topics, self._state, ) def get_coordinator_broker(self): def _get_coordinator_failed(result): retry_delay = self.initial_backoff_ms if result.check(CoordinatorNotAvailable, NotCoordinatorForConsumerError, RequestTimedOutError): # while kafka is starting up, it may take some time before it # successfully chooses an authoritative broker for the group log.debug("%s could not get coordinator broker: %s", self, result.value) if result.check(RequestTimedOutError): retry_delay = self.fatal_backoff_ms elif result.check(KafkaError): log.warn("%s could not get coordinator broker: %s", self, result.value) retry_delay = self.fatal_backoff_ms else: return result self.client.reactor.callLater( retry_delay / 1000.0, self.join_and_sync, ) return def _get_coordinator_success(leader): if not leader: self.client.reactor.callLater( self.initial_backoff_ms / 1000.0, self.join_and_sync, ) return # do a topic metadata load, because if we are elected leader # we will need to know the partition info for all our topics # to assign them correctly metadata_d = self.client.load_metadata_for_topics(*self.topics) metadata_d.addCallback(lambda result: self) return metadata_d d = self.client._get_coordinator_for_group(self.group_id) d.addCallbacks(_get_coordinator_success, _get_coordinator_failed) return d def send_join_group_request(self): payload = _JoinGroupRequest( group=self.group_id, session_timeout=self.session_timeout_ms, member_id=self.member_id, protocol_type=self.protocol.protocol_type, group_protocols=self.protocol.join_group_protocols(self.topics), ) def _join_group_success(response): self.member_id = response.member_id self.generation_id = response.generation_id self.leader_id = response.leader_id return response de = self.client._send_request_to_coordinator( self.group_id, payload, encoder_fn=KafkaCodec.encode_join_group_request, decode_fn=KafkaCodec.decode_join_group_response, # join_group requests can take up to 30s as the group restabilizes # override client.timeout to allow for that plus some extra min_timeout=35.0, ) de.addCallbacks( _join_group_success, self.rejoin_after_error, errbackKeywords=dict(label="join_group"), ) return de def send_sync_group_request(self, group_assignment): # Make the sync group request payload = _SyncGroupRequest( group=self.group_id, generation_id=self.generation_id, member_id=self.member_id, group_assignment=group_assignment, ) de = self.client._send_request_to_coordinator( group=self.group_id, payload=payload, encoder_fn=KafkaCodec.encode_sync_group_request, decode_fn=KafkaCodec.decode_sync_group_response, ) de.addErrback(self.rejoin_after_error, label="sync_group") return de def send_leave_group_request(self): # Make the leave group request payload = _LeaveGroupRequest( group=self.group_id, member_id=self.member_id, ) def _leave_group_success(result): log.debug("leave_group request succeeded") self.member_id = "" self.generation_id = None de = self.client._send_request_to_coordinator( group=self.group_id, payload=payload, encoder_fn=KafkaCodec.encode_leave_group_request, decode_fn=KafkaCodec.decode_leave_group_response, ) de.addCallback(_leave_group_success) return de def send_heartbeat_request(self): # Make the heartbeat request payload = _HeartbeatRequest( group=self.group_id, generation_id=self.generation_id, member_id=self.member_id, ) request_d = self.client._send_request_to_coordinator( group=self.group_id, payload=payload, encoder_fn=KafkaCodec.encode_heartbeat_request, decode_fn=KafkaCodec.decode_heartbeat_response, ) return request_d def start(self): if self._start_d: raise RestartError("Start called on already-started coordinator") log.debug("starting") self._start_d = Deferred() self.join_and_sync() return self._start_d def reset_heartbeat_timer(self): if self._heartbeat_looper.running: self._heartbeat_looper.reset() else: self._heartbeat_looper_d = self._heartbeat_looper.start( self.heartbeat_interval_ms / 1000.0, now=False, ) self._heartbeat_looper_d.addErrback(self._heartbeat_timer_failed) self._heartbeat_looper_d.addBoth(self._heartbeat_timer_stopped) @inlineCallbacks def stop(self, errback_result=None): if self._start_d is None: raise RestopError("Shutdown called on non-running coordinator") if self._stopping: raise RestopError("Shutdown called more than once.") log.info("%s stopping with %s", self, errback_result) self._state = '[stopping]' self._stopping = True self._rejoin_needed = False if self._rejoin_wait_dc: self._rejoin_wait_dc.cancel() if self._heartbeat_request_d: self._heartbeat_request_d.cancel() if self._heartbeat_looper: if self._heartbeat_looper.running: self._heartbeat_looper.stop() self._state = '[leaving]' if self.coordinator_broker is not None and self.member_id: try: yield self.send_leave_group_request() except Exception: log.exception("error sending leave group request") if self._rejoin_d: self._rejoin_d, d = None, self._rejoin_d d.cancel() self._state = '[stopped]' self.protocol = None self.member_id = "" self.generation_id = None self.coordinator_broker = None self._start_d, d = None, self._start_d if not d.called: if errback_result: d.errback(errback_result) else: d.callback(self) def _heartbeat(self): if self._stopping: return if self._rejoin_needed: log.debug("%s: skipping heartbeat, rejoin needed", self) return if self._heartbeat_request_d: log.debug("%s: skipping heartbeat, in progress", self) return log.debug("%s: heartbeat", self) self._heartbeat_request_d = self.send_heartbeat_request() self._heartbeat_request_d.addCallbacks( self._handle_heartbeat_success, self._handle_heartbeat_failure, ) def _handle_heartbeat_success(self, result): self._heartbeat_request_d = None log.debug("%s: heartbeat success", self) return result def _handle_heartbeat_failure(self, failure): self._heartbeat_request_d = None self._heartbeat_looper.stop() return self.rejoin_after_error(failure, label="heartbeat") def _heartbeat_timer_failed(self, failure): """ Called when the heartbeat LoopingCall terminates with an unhandled exception. This represents a bug. """ log.error("%s Unhandled exception in heartbeat loop", self, exc_info=(failure.type, failure.value, failure.getTracebackObject())) # TODO: This should handle the failure by leaving the group. return failure def _heartbeat_timer_stopped(self, result): self._heartbeat_looper_d = None return result def rejoin_after_error(self, result, label="rejoin_after_error"): rejoin_delay = self.retry_backoff_ms if result.check(RebalanceInProgress): log.debug("%s %s: group rebalance needed, rejoining", self, label) elif result.check(CoordinatorNotAvailable, NotCoordinatorForConsumerError): log.info("%s %s: group coordinator is invalid, rejoining", self, label) self.client.reset_consumer_group_metadata(self.group_id) elif result.check(IllegalGeneration): # we have been rejected - our consumers should not remain open log.info("%s %s: generation id %s is not current, rejoining", self, label, self.generation_id) self.on_group_leave() elif result.check(InvalidGroupId, UnknownMemberId): # we have been rejected - our consumers should not remain open log.info("%s %s: member id is not valid, rejoining", self, label) self.on_group_leave() self.member_id = "" elif result.check(InconsistentGroupProtocol): # this error can only happen if there's already a consumer group on # the topic and it doesn't support the protocols we've declared. # retrying is unlikely to help, so wait longer between attempts log.error("%s %s: group protocol was rejected, delaying rejoin", self, label) rejoin_delay = self.fatal_backoff_ms elif result.check(RequestTimedOutError): # If the request timed out, use the long delay before trying again log.warn("%s %s: request timed out, delaying rejoin", self, label) self.on_group_leave() self.client.reset_consumer_group_metadata(self.group_id) rejoin_delay = self.fatal_backoff_ms elif self._stopping and result.check(CancelledError): # Not really an error return elif result.check(KafkaError): # no other kafka errors are expected from the requests we're # sending out, so to get here means something is wrong. # retrying probably won't help, so wait longer between attempts log.error("%s %s: unexpected response error: %r, delaying rejoin", self, label, result) rejoin_delay = self.fatal_backoff_ms else: # if it wasn't a kafka error, we're not going to try to rejoin log.error("%s %s: unknown error %s", self, label, result) self.on_group_leave() self.stop(errback_result=result) return self._state = '[rejoin_needed]' self._rejoin_needed = True if not self._rejoin_wait_dc: rejoin_delay_s = rejoin_delay / 1000.0 log.debug("%s %s: scheduling rejoin in %.1fs", self, label, rejoin_delay_s) self._rejoin_wait_dc = self.client.reactor.callLater( rejoin_delay_s, self.join_and_sync) def join_and_sync(self): """ Called to join (or rejoin) the consumer group. Failures will result in join_and_sync being rescheduled to run again after a short delay, until it succeeds. Failed heartbeats will also reschedule join_and_sync """ if self._rejoin_wait_dc: self._rejoin_wait_dc = None if not self._rejoin_needed: log.debug("join_and_sync: rejoin not needed") return # prevent multiple concurrent request situations if self._rejoin_d: # XXX: This should throw, not silently ignore. log.debug("join_and_sync: rejoin in progress") return def cleanup_rejoin_d(result): self._rejoin_d = None return result def rejoin_d_errback(result): log.error("%s error during join_and_sync: %s", self, result) self._rejoin_d = d = self._join_and_sync() d.addBoth(cleanup_rejoin_d).addErrback(rejoin_d_errback) return d @inlineCallbacks def _join_and_sync(self): self._state = '[fetching_broker]' coordinator_broker = yield self.get_coordinator_broker() if not coordinator_broker or self._stopping: return self.coordinator_broker = coordinator_broker self._state = '[joining]' yield self.on_join_prepare() join_response = yield self.send_join_group_request() if not join_response or self._stopping: # join failed, we'll be called again after a small delay return log.debug("%s joined, leader=%s", self, join_response.leader_id == join_response.member_id) assignments = [] if join_response.leader_id == join_response.member_id: try: assignments = yield self.protocol.generate_assignments( join_response.members, topic_partitions={}, ) except _NeedTopicPartitions as e: topic_partitions = yield self.client._load_topic_partitions( *e.topics) assignments = yield self.protocol.generate_assignments( join_response.members, topic_partitions=topic_partitions, ) self._state = '[syncing]' sync_response = yield self.send_sync_group_request(assignments) if not sync_response or self._stopping: # sync failed, we'll be called again after a small delay return # sync success - update our assignments # and restart the heartbeat timer assignment = self.protocol.decode_assignment( sync_response.member_assignment) self.reset_heartbeat_timer() self._rejoin_needed = False self._state = '[joined]' yield self.on_join_complete(assignment) def on_group_leave(self): """ Called when the coordinator has been removed from the group (such as from a heartbeat error) and will not be rejoining """ log.info("%s: on_group_leave", self) def on_join_prepare(self): """ Called before joining or rejoining the group """ log.info("%s: on_join_prepare", self) def on_join_complete(self, assignments): """ Called after a successful group sync """ log.info("%s: on_join_complete assignments=%r", self, assignments)
class TelldusIn(Protocol): ''' Class for incoming Telldus events ''' noisy = False path = '/tmp/TelldusEvents' idleTimeout = 60 def __init__(self, parent): self.log = Logger(namespace=parent.name+'/in') self.parent = parent self.master = parent.master self.status = ColorState(log=self.log, state_format={0:0}) # <-- a hack to avoid color self.status.add_callback(self.parent.update_status) self.connected = False self.factory = TelldusInFactory(self, self.parent) def connect(self): self.status.set_OFF() self.master.reactor.connectUNIX(self.path, self.factory) def connectionMade(self): self.log.info("Connected to {p}", p=self.path) self.connected = True self.status.set_YELLOW('IN connection made, waiting for data') self.data = '' self.elements = [] self.timer = LoopingCall(self.dataTimeout) self.timer.start(self.idleTimeout, now=False) def connectionLost(self, reason): # pylint: disable=W0222 self.connected = False self.log.info("Lost connection with {p}: {e}", p=self.path, e=reason.getErrorMessage()) self.status.set_OFF('IN connection closed') if self.timer.running: self.timer.stop() def disconnect(self): if self.connected: self.transport.loseConnection() def dataReceived(self, data): self.log.debug('{_rawin}', rawin=data) if self.timer.running: self.timer.reset() else: self.timer.start(self.idleTimeout, now=False) data = self.data + data # Interpret the data (elements, data) = parsestream(data) (events, elements) = parseelements(elements, log=self.log) # Save remaining data for next call (incomplete frame received) self.data = data self.elements = elements # At this point, we can consider the connection up self.status.set_GREEN() # Iterate over the received events for event in events: self.log.debug('{_datain}', datain=event) self.parent.parse_event(event) def dataTimeout(self): self.timer.stop() self.log.info("No telldus activity. No connection?") self.status.set_YELLOW('No activity')
class IRCProtocol(irc.IRC): UNREGISTERED_COMMANDS = ["PASS", "USER", "NICK", "PING", "PONG", "QUIT", "CAP"] def __init__(self, *args, **kwargs): self.dead = False self.type = None self.password = None self.nick = None self.user = None self.secure = False self.data = 0 self.data_checker = LoopingCall(self.checkData) self.pinger = LoopingCall(self.ping) self.last_message = None def connectionMade(self): self.secure = ISSLTransport(self.transport, None) is not None self.data_checker.start(5) self.last_message = now() self.pinger.start(self.factory.client_ping_interval) ip = self.transport.getPeer().host expired = [] for mask, linedata in self.factory.xlines["Z"].iteritems(): if linedata["duration"] != 0 and epoch(now()) > epoch(linedata["created"]) + linedata["duration"]: expired.append(mask) continue if fnmatch.fnmatch(ip, mask): self.sendMessage( "NOTICE", "*", ":{}".format(self.factory.client_ban_msg), prefix=self.factory.server_name ) self.sendMessage("ERROR", ":Closing Link {} [Z:Lined: {}]".format(ip, linedata["reason"])) self.transport.loseConnection() for mask in expired: del self.factory.xlines["Z"][mask] if expired: self.factory.save_options() def dataReceived(self, data): if self.dead: return # self.factory.stats_data["bytes_in"] += len(data) # self.factory.stats_data["total_bytes_in"] += len(data) self.data += len(data) self.last_message = now() if self.pinger.running: self.pinger.reset() irc.IRC.dataReceived(self, data) def checkData(self): if self.type: self.type.checkData(self.data) self.data = 0 def ping(self): if (now() - self.last_message).total_seconds() > self.factory.client_timeout_delay: self.transport.loseConnection() self.connectionLost(None) else: self.sendMessage("PING", ":{}".format(self.factory.server_name)) def handleCommand(self, command, prefix, params): # self.factory.stats_data["lines_in"] += 1 # self.factory.stats_data["total_lines_in"] += 1 log.msg("handleCommand: {!r} {!r} {!r}".format(command, prefix, params)) if not self.type and command not in self.UNREGISTERED_COMMANDS: return self.sendMessage( irc.ERR_NOTREGISTERED, command, ":You have not registered", prefix=self.factory.server_name ) elif not self.type: return irc.IRC.handleCommand(self, command, prefix, params) else: return self.type.handleCommand(command, prefix, params) def sendLine(self, line): if self.dead: return # self.factory.stats_data["lines_out"] += 1 # self.factory.stats_data["total_lines_out"] += 1 # self.factory.stats_data["bytes_out"] += len(line)+2 # self.factory.stats_data["total_bytes_out"] += len(line)+2 log.msg("sendLine: {!r}".format(line)) return irc.IRC.sendLine(self, line) def irc_PASS(self, prefix, params): if not params: return self.sendMessage( irc.ERR_NEEDMOREPARAMS, "PASS", ":Not enough parameters", prefix=self.factory.server_name ) self.password = params[0] def irc_NICK(self, prefix, params): if not params: self.sendMessage(irc.ERR_NONICKNAMEGIVEN, ":No nickname given", prefix=self.factory.server_name) elif not VALID_USERNAME.match(params[0]): self.sendMessage( irc.ERR_ERRONEUSNICKNAME, params[0], ":Erroneous nickname", prefix=self.factory.server_name ) elif params[0] in self.factory.users: self.sendMessage( irc.ERR_NICKNAMEINUSE, self.factory.users[params[0]].nickname, ":Nickname is already in use", prefix=self.factory.server_name, ) else: lower_nick = irc_lower(params[0]) expired = [] for mask, linedata in self.factory.xlines["Q"].iteritems(): if linedata["duration"] != 0 and epoch(now()) > epoch(linedata["created"]) + linedata["duration"]: expired.append(mask) continue if fnmatch.fnmatch(lower_nick, mask): self.sendMessage( irc.ERR_ERRONEUSNICKNAME, self.nick if self.nick else "*", params[0], ":Invalid nickname: {}".format(linedata["reason"]), prefix=self.factory.server_name, ) return for mask in expired: del self.factory.xlines["Q"][mask] if expired: self.factory.save_options() self.nick = params[0] if self.user: try: self.type = self.factory.types["user"](self, self.user, self.password, self.nick) except ValueError: self.type = None self.transport.loseConnection() def irc_USER(self, prefix, params): if len(params) < 4: return self.sendMessage( irc.ERR_NEEDMOREPARAMS, "USER", ":Not enough parameters", prefix=self.factory.server_name ) self.user = params if self.nick: try: self.type = self.factory.types["user"](self, self.user, self.password, self.nick) except ValueError: self.type = None self.transport.loseConnection() def irc_SERVICE(self, prefix, params): try: self.type = self.factory.types["service"](self, params, self.password) except ValueError: self.type = None self.transport.loseConnection() def irc_SERVER(self, prefix, params): try: self.type = self.factory.types["server"](self, params, self.password) except ValueError: self.type = None self.transport.loseConnection() def irc_PING(self, prefix, params): if params: self.sendMessage("PONG", self.factory.server_name, ":{}".format(params[0]), prefix=self.factory.server_name) else: # TODO: There's no nickname here, what do? self.sendMessage(irc.ERR_NOORIGIN, "CHANGE_THIS", ":No origin specified", prefix=self.factory.server_name) def irc_PONG(self, prefix, params): pass def irc_CAP(self, prefix, params): pass def irc_QUIT(self, prefix, params): self.transport.loseConnection() def connectionLost(self, reason): if self.dead: return self.dead = True self.factory.unregisterProtocol(self) if self.type: self.type.connectionLost(reason) if self.data_checker.running: self.data_checker.stop() if self.pinger.running: self.pinger.stop()
class Startup(YomboLibrary): """ Start-up checks Checks to make sure basic configurations are valid and other start-up operations. """ def __str__(self): """ Returns the name of the library. :return: Name of the library :rtype: string """ return "Yombo startup library" def _init_(self, **kwargs): first_run = self._Configs.get('core', 'first_run', False, False) if self._Loader.operating_mode == 'first_run': # will know if first_run already or yombo.ini is missing. return if first_run is True: self._Loader.operating_mode = 'first_run' return True items_needed = [] gwid = self._Configs.get('core', 'gwid', 'local', False) if gwid is None or gwid == "": items_needed.append("Gateway ID") gwuuid = self._Configs.get("core", "gwuuid", None) if gwuuid is None or gwuuid == "": items_needed.append("Gateway private UUID") hash = self._Configs.get("core", "gwhash", None) if hash is None or hash == "": items_needed.append("Gateway login password hash") is_master = self._Configs.get("core", "is_master", True) if is_master is False: master_gateway = self._Configs.get("core", "master_gateway", None, False) if master_gateway is None or master_gateway == "": items_needed.append( "Gateway is marked as slave, but not master gateway set.") else: master_gateway = self._Configs.get("core", "master_gateway", None, False) if len(items_needed) > 0: needed_text = '</li><li>'.join(items_needed) print("start needetext: %s" % needed_text) self._Notifications.add({ 'title': 'Need configurations', 'message': 'System has been places into configuration mode. Reason: The following configurations are needed:<p><ul><li>%s</li></ul>' % needed_text, 'source': 'Yombo Startup Library', 'persist': False, 'priority': 'high', 'always_show': True, 'always_show_allow_clear': True, }) self._Loader.operating_mode = 'config' else: self._Loader.operating_mode = 'run' self.cache_updater_running = False self.system_stopping = False def _start_(self, **kwargs): """ Handles making sure caches are regularly updated and cleaned. :return: """ self.update_caches_loop = LoopingCall(self.update_caches) self.update_caches_loop.start(60 * 60 * 6, False) def _stop_(self, **kwargs): self.system_stopping = True @inlineCallbacks def update_caches(self, force=None, quick=False): """ Iterates through all libraries to update the cache, then tells the modules to do the same. :return: """ if self.cache_updater_running is True and force is True: self.cache_updater_running == None return if self._Loader.operating_mode != 'run' or self.cache_updater_running is not False or \ self.system_stopping is True: return logger.info("Starting cache updates.") # Copied from Modules library, but with sleeps and checks. for module_id, module in self._Modules.modules.items(): if self.cache_updater_running is None or self.system_stopping is True: self.cache_updater_running = False self.update_caches_loop.reset() return yield self._Modules.do_update_module_cache(module) yield sleep(1) for device_id, device in self._Devices.devices.items(): if self.cache_updater_running is None or self.system_stopping is True: self.cache_updater_running = False self.update_caches_loop.reset() return yield device.device_variables() yield sleep(0.5)
class C2SServerProtocol(InternalServerProtocol): pinger = None ping_txid = None def __init__(self, config): txprotobuf.Protocol.__init__(self, c2s) self.MAX_LENGTH = config['server']['c2s.pack_size_max'] self.idler = LoopingCall(self.onIdle) if 'reset' not in dir(self.idler): def reset(self): try: self.idler.stop() except: pass try: self.idler.start(IDLE_DELAY, False) except: pass self.idler.reset = reset def connectionMade(self): InternalServerProtocol.connectionMade(self) """ TODO this breaks compatibility with Android client < 16 info = InternalServerProtocol.connectionMade(self) # send serverinfo r = c2s.ServerInfo() r.version = info['version'] r.client_protocol = info['client-protocol'] r.fingerprint = info['fingerprint'] r.network = info['network'] r.supports.extend(info['supports']) self.sendBox(r) """ # start idler self.idler.start(IDLE_DELAY, False) def connectionLost(self, reason=connectionDone): try: # stop idler self.idler.stop() except: pass try: # stop pinger self.pinger.stop() except: pass InternalServerProtocol.connectionLost(self, reason) def onIdle(self): self.service.idle() # send ping a = c2s.Ping() a.timestamp = long(time.time()) self.ping_txid = self.sendBox(a) # ping is sent, wait for response if not self.pinger: self.pinger = reactor.callLater(PING_DELAY, self.pingTimeout) def pingTimeout(self): try: self.idler.stop() except: pass self.service.ping_timeout() self.pinger = None def boxReceived(self, data, tx_id=None): # reset idler try: self.idler.reset() except: pass # optional reply r = None name = data.__class__.__name__ if name == 'LoginRequest': r = c2s.LoginResponse() (r.status, userid) = self.service.login(str(tx_id), data.token, data.client_protocol, data.client_version, data.flags) if userid: r.user_id = userid elif name == 'AuthenticateRequest': r = c2s.AuthenticateResponse() r.valid = self.service.authenticate(str(tx_id), data.token) elif name == 'MessagePostRequest': if self.service.is_logged(): r = c2s.MessagePostResponse() if len(data.recipient) > 0: res = self.service.post_message(str(tx_id), tuple(data.recipient), str(data.mime), tuple(data.flags), data.content) for userid, msgid in res.iteritems(): me = r.entry.add() me.user_id = userid if type(msgid) == str: me.status = c2s.MessagePostResponse.MessageSent.STATUS_SUCCESS me.message_id = msgid elif msgid: me.status = msgid else: me.status = c2s.MessagePostResponse.MessageSent.STATUS_ERROR elif name == 'MessageAckRequest': if self.service.is_logged(): r = c2s.MessageAckResponse() res = self.service.ack_message(str(tx_id), tuple(data.message_id)) for _msgid in data.message_id: msgid = str(_msgid) e = r.entry.add() e.message_id = msgid try: success = res[msgid] if success: e.status = c2s.MessageAckResponse.Entry.STATUS_SUCCESS else: e.status = c2s.MessageAckResponse.Entry.STATUS_NOTFOUND except: import traceback traceback.print_exc() e.status = c2s.MessageAckResponse.Entry.STATUS_ERROR elif name == 'RegistrationRequest': if not self.service.is_logged(): r = c2s.RegistrationResponse() res = self.service.register_user(str(tx_id), str(data.username)) r.status = res['status'] if 'token' in res: r.token = res['token'] if 'sms_from' in res: r.sms_from = res['sms_from'] if 'email_from' in res: r.email_from = res['email_from'] elif name == 'ValidationRequest': if not self.service.is_logged(): r = c2s.ValidationResponse() res, token = self.service.validate_user( str(tx_id), str(data.validation_code)) r.status = res if token: r.token = token elif name == 'UserLookupRequest': if self.service.is_logged(): # lookup can delay operations by a long time, so defer it def lookup_complete(found, tx_id, return_value=False): r = c2s.UserLookupResponse() for u in found: e = r.entry.add() e.user_id = u['userid'] if 'status' in u: e.status = u['status'] if 'timestamp' in u: e.timestamp = u['timestamp'] if 'timediff' in u: e.timediff = u['timediff'] if return_value: return r else: self.sendBox(r, tx_id) # force generic users only found = self.service.lookup_users( tx_id, [str(x)[:utils.USERID_LENGTH] for x in data.user_id]) if isinstance(found, defer.Deferred): found.addCallback(lookup_complete, tx_id) else: r = lookup_complete(found, tx_id, True) elif name == 'ServerInfoRequest': r = c2s.ServerInfoResponse() info = self.service.serverinfo(str(tx_id), str(data.client_version), data.client_protocol) r.version = info['version'] r.client_protocol = info['client-protocol'] r.server_protocol = info['server-protocol'] r.fingerprint = info['fingerprint'] r.supports.extend(info['supports']) elif name == 'UserInfoUpdateRequest': if self.service.is_logged(): r = c2s.UserInfoUpdateResponse() flags = None if data.HasField('flags'): flags = data.flags if data.HasField('status_message'): status_msg = data.status_message.encode('utf-8') else: status_msg = None if data.HasField('google_registration_id'): google_regid = str(data.google_registration_id) else: google_regid = None r.status = self.service.user_update(flags, status_msg, google_regid) elif name == 'UserPresenceSubscribeRequest': if self.service.is_logged(): r = c2s.UserPresenceSubscribeResponse() r.status = self.service.user_presence_subscribe( str(data.user_id), data.events) elif name == 'Pong': # client has responded: check tx_id if self.ping_txid == tx_id: # reset ping timeout check try: self.pinger.cancel() except: pass self.pinger = None elif name == 'ValidationCodeRequest': if self.service.is_logged(): code = self.service.revalidate() r = c2s.ValidationCodeResponse() if type(code) == str: r.status = c2s.ValidationCodeResponse.STATUS_SUCCESS r.code = code else: r.status = c2s.ValidationCodeResponse.STATUS_ERROR if r: self.sendBox(r, tx_id) # reset idler (again) try: self.idler.reset() except: pass
class AmqpConfigHandler(YomboLibrary): """ Handles interactions with Yombo servers through the AMQP library. """ def __init__(self, amqpyombo): """ Loads various variables and calls :py:meth:connect() when it's ready. :return: """ self.parent = amqpyombo self.init_startup_count = 0 self.inital_config_items_requested = 0 self.init_defer = None # Prevents loader from moving on until we are done. self.__doing_full_configs = False # will be set to True later when download configurations self.__pending_updates = { } # Holds a dict of configuration items we've asked for, but not response yet. self.__process_queue = { } # Holds a list of configuration items we've asked for, but not response yet. self.processing = False self.processing_queue = False self.db_existing_data = { } # used to keep track of items to add, update, or delete self.db_completed_ids = {} # [table][row_id] self.db_delete_ids = {} # [table][row_id] self.db_insert_data = {} # [table][row_id] = orderedDicts self.db_update_data = {} # [table][row_id] = Dictionaries self._checkProcessQueueLoop = LoopingCall(self.process_config_queue) self.check_download_done_calllater = None self._LocalDB = self.parent._LocalDB self.amqp = None # holds our pointer for out amqp connection. self._getAllConfigsLoggerLoop = None def connect_setup(self, init_defer): """ The connection was setup, but not quite ready to ask for configs. Just setup a defer. :return: """ self.init_defer = init_defer def connected(self): """ Called when fully connected. :return: """ self.get_system_configs() self.check_download_done_calllater = reactor.callLater( 15, self.check_download_done) # return self.init_defer def disconnected(self): """ Called by amqpyombo when the system is disconnected. :return: """ if self._getAllConfigsLoggerLoop is not None and self._getAllConfigsLoggerLoop.running: self._getAllConfigsLoggerLoop.stop() if self._checkProcessQueueLoop is not None and self._checkProcessQueueLoop.running: self._checkProcessQueueLoop.stop() self.__pending_updates.clear() self.__doing_full_configs = False try: self.check_download_done_calllater.cancel() except: pass def _stop_(self): """ Called by the Yombo system when it's time to shutdown. This in turn calls the disconnect. :return: """ if self.init_defer.called is False: # reactor.callLater(0.1, self.init_defer.callback, 10) # self.init_defer.callback( 1) # if we don't check for this, we can't stop! if self._getAllConfigsLoggerLoop is not None and self._getAllConfigsLoggerLoop.running: self._getAllConfigsLoggerLoop.stop() if self._checkProcessQueueLoop is not None and self._checkProcessQueueLoop.running: self._checkProcessQueueLoop.stop() try: self.check_download_done_calllater.cancel() except: pass def check_download_done(self): """ Called after 30 seconds to check if downloads have completed. If they haven't, it will just give up and move on. :return: """ # print(f"check_download_done_calllater....{self.__process_queue}") if self.__doing_full_configs is True and len( self.__process_queue) == 0: last_complete = self.parent._Configs.get("amqpyombo", "lastcomplete", None, False) if last_complete == None: if self.init_startup_count > 5: logger.error( "Unable to reach or contact server. If problem persists, check your configs. " "(Help link soon.)") self.reconnect = False reactor.stop() return logger.warn( "Try #{count}, haven't been able to download configurations." " However, there are no existing configs. Will keep trying.", count=self.init_startup_count) else: if last_complete < int(time() - 60 * 60 * 48): logger.warn( "Try #{count}, haven't been able to download configurations. " "Will continue trying in background.", count=self.init_startup_count) logger.warn( "Using old configuration information. If this persists, check your configs. " "(Help link soon.)") if self.init_defer.called is False: self.init_defer.callback( 1) # if we don't check for this, we can't stop! else: logger.error( "Unable to reach or contact server. Configurations too old to keep using. " "If problem persists, check your configs. (Help link soon.)" ) self.reconnect = False reactor.stop() return self.init_startup_count = self.init_startup_count + 1 self.check_download_done_calllater = reactor.callLater( 30, self.check_download_done) # def amqp_incoming_response(self, headers, body, properties, **kwargs): """ Process configuration information coming from Yombo Servers. After message is validated, the payload is delivered here. This is an intermediary method and converts the msg into usable chunks and delievered to "add_update_delete". In the future, these may be delievered to the individual libraries for processing. :param msg: raw message from AQMP :param properties: msg properties, so additional information can be retrieved. :return: """ # logger.info("properties: {properties}", properties=properties) # logger.info("headers: {headers}", headers=properties.headers) response_type = headers["response_type"] if response_type not in CONFIG_ITEMS: raise YomboWarning( f"Configuration item 'get_{response_type}' not configured.") self.__process_queue[random_string(length=10)] = { "msg": body, "headers": headers, "properties": properties, } full_response_type = "get_" + response_type if full_response_type not in self.__pending_updates: # Ignore old request items. logger.warn( "Configuration item not requested, dropping: {response_type}", response_type=response_type) return self.__pending_updates[full_response_type]["status"] = "received" self.process_config_queue() @inlineCallbacks def process_config_queue(self): """ We queue incoming responses for processing. This helps to prevent race conditions as well as killing sqlite3 database with a bunch of concurrent requests. This is called to feed the process_config function configuration items to process and is called whenever we receive a configuration response item as well as a looping call to ensure everything gets processed. :return: """ if self.processing: return None if self.processing_queue == False: logger.debug("Starting process_config_queue.") self.processing_queue = True for key in list(self.__process_queue): queue = self.__process_queue[key] # print("processing queue item: %s" % queue) # if queue["headers"]["config_item"] == "get_gateway_cluster": # print("msg: %s" % queue["msg"]) # print("self.__pending_updates: %s" % self.__pending_updates) self.__pending_updates[ f'get_{queue["headers"]["response_type"]}'][ "status"] = "processing" yield self.process_config(queue["msg"], queue["headers"]["response_type"], queue["headers"]["config_type"], True) del self.__process_queue[key] self.processing_queue = False @inlineCallbacks def process_config(self, msg, config_item, config_type=None, primary_config_item=None): """ Process configuration information coming from Yombo Servers. After message is validated, the payload is delivered here. This is an intermediary method and converts the msg into usable chunks and delievered to "add_update_delete". In the future, these may be delievered to the individual libraries for processing. :param msg: raw message from AQMP :param properties: msg properties, so additional information can be retrieved. :return: """ self.processing = True # print("processing config.... %s" % config_item) # print("processing msg.... %s" % msg) if msg["code"] != 200: logger.warn( "Configuration error for '{type}' received an error ({code}): {error}", type=config_item, code=msg["code"], error=msg["message"]) yield self._remove_full_download_dueue("get_" + config_item) self.processing = False return if config_item == "gateway_configs": payload = msg["data"] for section in payload: for key in section["Values"]: self.parent._Configs.set(section["Section"], key["Key"], key["Value"]) elif config_item == "gateway_dns_name": payload = msg["data"] self.parent._Configs.set("dns", "name", payload["dns_name"]) self.parent._Configs.set("dns", "domain", payload["dns_domain"]) self.parent._Configs.set("dns", "domain_id", payload["dns_domain_id"]) self.parent._Configs.set("dns", "allow_change_at", payload["allow_change_at"]) self.parent._Configs.set("dns", "fqdn", payload["fqdn"]) elif config_item in CONFIG_ITEMS: config_data = CONFIG_ITEMS[config_item] if msg["data_type"] == "object": # print("field remap start object") data = self.field_remap(msg["data"], config_data) # print("field remap stop") # logger.info("in amqpyombo:process_config ->> config_item: {config_item}", config_item=config_item) # logger.info("amqpyombo::process_config - data: {data}", data=data) # print("add_update_delete start object") yield self.add_update_delete(msg, data, config_item, config_type, True) # print("fadd_update_delete stop object") # self._Devices.add_update_delete(new_data) self.process_config(data, config_item) else: # print("add_update_delete start list") processed_data = [] for data in msg["data"]: processed_data.append(self.field_remap(data, config_data)) # logger.info("in amqpyombo:process_config ->> config_item: {config_item}", config_item=config_item) # logger.info("amqpyombo::process_config - data: {data}", data=data) yield self.add_update_delete(msg, processed_data, config_item, config_type, True) # print("add_update_delete stop list") else: logger.warn( "ConfigurationUpdate::process_config - '{config_item}' is not a valid configuration item. Skipping.", config_item=config_item) yield self._remove_full_download_dueue("get_" + config_item) self.processing = False def field_remap(self, data, config_data): new_data = {} table_meta = self._LocalDB.db_model[config_data["table"]] key = None value = None try: for key, value in data.items( ): # we must re-map AMQP names to local names. Removes ones without a DB column too. if key in config_data["map"]: # Convert ints and floats. if value is None: pass elif table_meta[config_data["map"] [key]]["type"] == "INTEGER": value = int(value) elif table_meta[config_data["map"][key]]["type"] == "REAL": value = float(value) elif table_meta[config_data["map"] [key]]["type"] == "BOOLEAN": value = is_true_false(value) if key == "energy_map": if isinstance(value, dict): try: value = json.dumps(value) except Exception as e: value = '{"0.0":0,"1.0":0}' new_data[config_data["map"][key]] = value else: new_data[key] = value return new_data except Exception as e: print(f"error in field remap. Last key: {key}") print(f"input value: {value}") print(f"field remap - config_data = {config_data}") print(f"field remap - data = {data}") print(f"tablemeta: {table_meta}") logger.error("--------==(Error: {e})==--------", e=e) logger.error( "--------------------------------------------------------") logger.error("{error}", error=sys.exc_info()) logger.error( "---------------==(Traceback)==--------------------------") logger.error("{trace}", trace=traceback.print_exc(file=sys.stdout)) logger.error( "--------------------------------------------------------") # still in progress function. Need to clean up DB calls. # todo: what about updates directly to the library? Just call: config_item = get_config_item("devices") @inlineCallbacks def add_update_delete(self, msg, data_items, config_item, config_type, from_amqp_incoming=False): """ Adds, updates, or delete various items based on the content of the data item being sent it. It will inspect the data and make a determination on what to do. It will also consider the values stored in the database for add vs update. :param data: :param config_item: :param from_amqp_incoming: :return: """ # print("add_update_delete called...%s" % config_item) config_data = CONFIG_ITEMS[config_item] current_table = config_data["table"] required_db_keys = [] table_cols = self._LocalDB.db_model[current_table] if current_table not in self.db_existing_data: # print("creating db tracking trackers for: %s" % current_table) self.db_existing_data[current_table] = {} self.db_insert_data[current_table] = [] self.db_update_data[current_table] = [] self.db_completed_ids[current_table] = [] self.db_delete_ids[current_table] = [] select_fields = ["id"] if "updated_at" in table_cols: select_fields.append("updated_at") if "created_at" in table_cols: select_fields.append("created_at") records = yield self._LocalDB.select(current_table, ", ".join(select_fields)) for record in records: self.db_existing_data[current_table][record["id"]] = record if config_item == "gateway_modules": records = yield self._LocalDB.select("module_installed", "id") self.db_existing_data["module_installed"] = {} for record in records: self.db_existing_data["module_installed"][ record["id"]] = True records = yield self._LocalDB.select("module_commits", "id") self.db_existing_data["module_commits"] = {} for record in records: self.db_existing_data["module_commits"][ record["id"]] = True self.db_insert_data["module_commits"] = [] self.db_update_data["module_commits"] = [] self.db_completed_ids["module_commits"] = [] # print("gateway module data_items: %s" % data_items) # self.db_existing_data = {} # used to keep track of items to add, update, or delete # self.db_delete_ids = {} # [table][row_id] # self.db_insert_data = {} # [table][row_id] = orderedDicts # self.db_update_data = {} # [table][row_id] = Dictionaries for col, col_data in table_cols.items(): if col_data["notnull"] == 1: required_db_keys.append(col) if isinstance(data_items, list) is False: data_items = (data_items, ) to_save = deque() try: for data in data_items: filtered_data = { } # dict of just keys that are allowed in the DB. for key, value in data.items(): if key in table_cols: filtered_data[key] = data[key] if "status" in filtered_data: if filtered_data[ "status"] == 2: # delete any nested items... if config_item == "gateway_modules": # print("config_item: %s" % config_item) # print("current_table: %s" % current_table) # print("self.db_existing_data: %s" % self.db_existing_data) # print("filtered data: %s" % filtered_data) self.item_purged(config_item, filtered_data["id"]) if filtered_data["id"] in self.db_existing_data[ "modules"]: self.db_delete_ids["modules"].append( filtered_data["id"]) self.db_completed_ids["modules"].append( filtered_data["id"]) if filtered_data["id"] in self.db_existing_data[ "module_installed"]: self.db_delete_ids["module_installed"].append( filtered_data["id"]) self.db_completed_ids[ "module_installed"].append( filtered_data["id"]) self._LocalDB.delete( "module_commits", where=["module_id = ?", filtered_data["id"]]) # print(f"gateway commits: {data['module_commits']}") elif filtered_data["id"] in self.db_existing_data[ current_table]: self.db_delete_ids[current_table].append( filtered_data["id"]) self.db_completed_ids[current_table].append( filtered_data["id"]) continue if config_item == "gateway_modules": for commit in data['module_commits']: if commit["id"] in self.db_existing_data[ "module_commits"]: self.db_update_data["module_commits"].append( commit) else: self.db_insert_data["module_commits"].append( commit) self.db_completed_ids["module_commits"].append( filtered_data["id"]) # check to make sure the record has the required columns. if dict_has_key(filtered_data, required_db_keys) is False: missing = set(required_db_keys) - filtered_data.keys() logger.warn( "{config_item}: Cannot do anything. Must have these keys: {needs} Missing: {missing}", config_item=config_item, needs=required_db_keys, missing=missing) self.db_completed_ids[current_table].append( filtered_data["id"]) continue library = None if config_data["library"] is not None: library = self.parent._Loader.loaded_libraries[ config_data["library"]] if config_type == "full": records = [] else: records = yield self._LocalDB.get_dbitem_by_id( config_data["dbclass"], filtered_data["id"]) # handle any nested items here. if "device_types" in data: if len(data["device_types"]): newMsg = msg.copy() newMsg["data"] = data["device_types"] yield self.process_config(newMsg, "module_device_type") if "variable_groups" in data: if len(data["variable_groups"]): newMsg = msg.copy() newMsg["data"] = data["variable_groups"] yield self.process_config(newMsg, "variable_groups") if "variable_fields" in data: if len(data["variable_fields"]): newMsg = msg.copy() newMsg["data"] = data["variable_fields"] yield self.process_config(newMsg, "variable_fields") if "variable_data" in data: if len(data["variable_data"]): newMsg = msg.copy() newMsg["data"] = data["variable_data"] yield self.process_config(newMsg, "variable_data") # If a new record # print("if filtered_data["id"] (%s) not in self.db_existing_data[current_table] (%s) == 0: if filtered_data["id"] not in self.db_existing_data[ current_table]: if "status" in filtered_data and filtered_data[ "status"] == 2: # we don"t add deleted items... if config_data["purgeable"]: self.item_purged(config_item, filtered_data["id"]) continue item_to_insert = OrderedDict() for col, col_info in table_cols.items(): if col in filtered_data: item_to_insert[col] = filtered_data[col] else: item_to_insert[col] = None self.db_insert_data[current_table].append(item_to_insert) self.db_completed_ids[current_table].append( filtered_data["id"]) if "added" in config_data["functions"]: klass = getattr(library, config_data["functions"]["updated_at"]) klass( data, True ) # True = the library doesn"t need to update the database # update records else: # if config_item == "gateway_modules": # print("new data: %s" % filtered_data) # print("old data: %s" % self.db_existing_data[current_table][filtered_data["id"]]) # print("filtered_data["updated_at"]: %s" % filtered_data["updated_at"]) if "updated_at" in filtered_data: # Only update if records says it's newer... if filtered_data[ "updated_at"] == self.db_existing_data[ current_table][ filtered_data["id"]]["updated_at"]: self.db_completed_ids[current_table].append( data["id"]) continue elif "created_at" in filtered_data: if filtered_data[ "created_at"] == self.db_existing_data[ current_table][ filtered_data["id"]]["created_at"]: self.db_completed_ids[current_table].append( data["id"]) continue item_to_update = OrderedDict() for col, col_info in table_cols.items(): if col in filtered_data: item_to_update[col] = filtered_data[col] else: item_to_update[col] = None # print("updating table: %s" % current_table) self.db_update_data[current_table].append(item_to_update) self.db_completed_ids[current_table].append( filtered_data["id"]) # self._LocalDB.dbconfig.update(current_table, filtered_data, where=["id = ?", data["id"]] ) if "updated_at" in config_data["functions"]: klass = getattr(library, config_data["functions"]["added"]) klass(filtered_data, True) # print("calling insertmany for config_data %s" % to_save) # if len(to_save) > 0: # yield self._LocalDB.insert_many(current_table, to_save) except Exception as e: logger.error( "-------==(Error: While saving new config data)==--------") logger.error( "--------------------------------------------------------") logger.error("{error}", error=sys.exc_info()) logger.error( "---------------==(Traceback)==--------------------------") logger.error("{trace}", trace=traceback.print_exc(file=sys.stdout)) logger.error( "--------------------------------------------------------") logger.warn("Got exception: {e}", e=e) def get_config_item(self, library): """ Simple lookup function to get config_item :param library: :return: """ return CONFIG_ITEM_MAP[library] def get_system_configs(self): """ On startup, this is called to request all configuration items. TODO: After heavy development cycle is done, we check will be placed here to determine if the gateway can reach the servers or not. It it's not possible, and we some configuration data that isn't too old, we will can start without talking to the servers. :return: """ self.__doing_full_configs = True self._full_download_start_at = time() self._getAllConfigsLoggerLoop = LoopingCall(self._show_pending_configs) self._getAllConfigsLoggerLoop.start( 3, False ) # Display a log line for pending downloaded, false - Dont' show now self._checkProcessQueueLoop.start(0.5, False) logger.info( "Requesting system configurations from server. This can take a few seconds." ) allCommands = [ "get_categories", "get_gateway_cluster", "get_gateway_commands", "get_gateway_device_command_inputs", "get_gateway_device_types", "get_gateway_device_type_commands", "get_gateway_devices", # Includes device variable groups/fields/data "get_gateway_dns_name", "get_gateway_input_types", "get_gateway_locations", "get_gateway_modules", # Includes device variable groups/fields/data "get_gateway_nodes", "get_gateway_users", # "get_gateway_configs", # "getGatewayUserTokens", ] self.inital_config_items_requested = len(allCommands) cur_time = int(time()) for item in allCommands: logger.debug("sending command: {item}", item=item) # request device updates body = { "since": self.parent._Configs.get("amqpyombo_config_times", item, 0), } self.parent._Configs.set("amqpyombo_config_times", item, cur_time), # headers= { # "config_item": item, # } request = self.generate_config_request( # headers=headers, body=body, request_type=item, ) self.parent.publish(**request) self._append_full_download_queue(item) def item_purged(self, config_item, id): body = {"id": id} headers = { "request_type": "config", "config_item": f"purged_{config_item}", } request_msg = self.parent.generate_message_request( exchange_name="ysrv.e.gw_config", source="yombo.gateway.lib.amqpyobo", destination="yombo.server.configs", headers=headers, body=body, ) request_msg["correlation_persistent"] = False self.parent.publish(**request_msg) def generate_config_request(self, headers=None, body=None, request_type=None): """ Generate a request specific to this library - configs! :param headers: :param request_data: :return: """ request_msg = self.parent.generate_message_request( exchange_name="ysrv.e.gw_config", source="yombo.gateway.lib.amqpyobo", destination="yombo.server.configs", request_type=request_type, headers=headers, body=body, ) # logger.debug("response: {request_msg}", request_msg=request_msg) request_msg["correlation_persistent"] = False return request_msg def _append_full_download_queue(self, table): """ Adds an item to pending table queue. Will be removed as each config item is returned by _removeFullTableQueue. """ if table not in self.__pending_updates: logger.debug("Adding table to request queue: {table}", table=table) self.__pending_updates[table] = { "status": "requested", "request_at": time(), } @inlineCallbacks def _remove_full_download_dueue(self, table): # logger.info("Removing table from request queue: {table}", table=table) # logger.info("Configs pending: {pendingUpdates}", pendingUpdates=self.__pending_updates) if table in self.__pending_updates: del self.__pending_updates[table] else: return # logger.debug("Configs pending: {pendingUpdates}", pendingUpdates=self.__pending_updates) self.init_startup_count = 0 if len(self.__pending_updates ) == 0 and self.__doing_full_configs is True: self.__doing_full_configs = False self._getAllConfigsLoggerLoop.stop() self._checkProcessQueueLoop.stop() # now we do all actual database updates, deletes, inserts... # self.db_existing_data = {} # used to keep track of items to add, update, or delete # self.db_delete_ids = {} # [table][row_id] # self.db_insert_data = {} # [table][row_id] = orderedDicts # self.db_update_data = {} # [table][row_id] = Dictionaries for table, records in self.db_delete_ids.items(): for completed_id in self.db_completed_ids[ table]: # delete items that were not sent to us. if completed_id in self.db_existing_data[table]: del self.db_existing_data[table][completed_id] for existing_id in list(self.db_existing_data[table].keys()): # print("Left over items: should be deleted.... %s: %s" % (table, existing_id)) records.append(existing_id) if len(records) > 0: # print("%s should be delete_many these records: %s" % (table, records)) yield self._LocalDB.delete_many(table, records) self.db_existing_data = {} for table, records in self.db_update_data.items(): if len(records) > 0: # print("%s should be update_many records: %s" % (table, records)) yield self._LocalDB.update_many(table, records, "id") self.db_update_data = {} for table, records in self.db_insert_data.items(): if len(records) > 0: # print("%s should be insert_many these records: %s" % (table, records)) yield self._LocalDB.insert_many(table, records) self.db_update_data = {} reactor.callLater(0.1, self.done_with_startup_downloads ) # give DB some breathing room else: if self._getAllConfigsLoggerLoop is not None and self._getAllConfigsLoggerLoop.running: self._getAllConfigsLoggerLoop.reset() if self.check_download_done_calllater is not None and self.check_download_done_calllater.active( ): self.check_download_done_calllater.reset(30) else: self.check_download_done_calllater = reactor.callLater( 30, self.check_download_done) # def done_with_startup_downloads(self): """ Called when configuration downloads are complete. :return: """ self.parent._Configs.set("amqpyombo", "lastcomplete", int(time())) if self.init_defer.called is False: self.init_defer.callback( 1) # if we don't check for this, we can't stop! # self.parent._Notifications.add( # {'title': 'Configs Downloaded', 'message': 'Downloaded system configurations from Yombo server.', # 'timeout': 300, 'source': 'Yombo Gateway System'}) def _show_pending_configs(self): waitingTime = time() - self._full_download_start_at logger.debug( "Waited {waitingTime} for startup; pending these configurations:", waitingTime=waitingTime) count_pending = 0 currently_processing = "" display_waiting_on = [] for key in list(self.__pending_updates): count_pending += 1 logger.debug("Config: {config}, Status: {status}", config=key, status=self.__pending_updates[key]["status"]) if self.__pending_updates[key]["status"] == "processing": currently_processing = key self.check_download_done_calllater.reset(30) display_waiting_on.append(key) if currently_processing == "": logger.info( "Waiting for the following items from server: {the_list}", the_list=", ".join(display_waiting_on)) else: logger.info( "Processing configuration, {count_pending} of {numer_requested}. Currently processing: {currently_processing}", currently_processing=currently_processing, count_pending=self.inital_config_items_requested - count_pending, numer_requested=self.inital_config_items_requested) def _local_log(self, level, location, msg=""): logit = func = getattr(logger, level) logit("In {location} : {msg}", location=location, msg=msg)
class C2SServerProtocol(InternalServerProtocol): pinger = None ping_txid = None def __init__(self, config): txprotobuf.Protocol.__init__(self, c2s) self.MAX_LENGTH = config["server"]["c2s.pack_size_max"] self.idler = LoopingCall(self.onIdle) if "reset" not in dir(self.idler): def reset(self): try: self.idler.stop() except: pass try: self.idler.start(IDLE_DELAY, False) except: pass self.idler.reset = reset def connectionMade(self): InternalServerProtocol.connectionMade(self) """ TODO this breaks compatibility with Android client < 16 info = InternalServerProtocol.connectionMade(self) # send serverinfo r = c2s.ServerInfo() r.version = info['version'] r.client_protocol = info['client-protocol'] r.fingerprint = info['fingerprint'] r.network = info['network'] r.supports.extend(info['supports']) self.sendBox(r) """ # start idler self.idler.start(IDLE_DELAY, False) def connectionLost(self, reason=connectionDone): try: # stop idler self.idler.stop() except: pass try: # stop pinger self.pinger.stop() except: pass InternalServerProtocol.connectionLost(self, reason) def onIdle(self): self.service.idle() # send ping a = c2s.Ping() a.timestamp = long(time.time()) self.ping_txid = self.sendBox(a) # ping is sent, wait for response if not self.pinger: self.pinger = reactor.callLater(PING_DELAY, self.pingTimeout) def pingTimeout(self): try: self.idler.stop() except: pass self.service.ping_timeout() self.pinger = None def boxReceived(self, data, tx_id=None): # reset idler try: self.idler.reset() except: pass # optional reply r = None name = data.__class__.__name__ if name == "LoginRequest": r = c2s.LoginResponse() (r.status, userid) = self.service.login( str(tx_id), data.token, data.client_protocol, data.client_version, data.flags ) if userid: r.user_id = userid elif name == "AuthenticateRequest": r = c2s.AuthenticateResponse() r.valid = self.service.authenticate(str(tx_id), data.token) elif name == "MessagePostRequest": if self.service.is_logged(): r = c2s.MessagePostResponse() if len(data.recipient) > 0: res = self.service.post_message( str(tx_id), tuple(data.recipient), str(data.mime), tuple(data.flags), data.content ) for userid, msgid in res.iteritems(): me = r.entry.add() me.user_id = userid if type(msgid) == str: me.status = c2s.MessagePostResponse.MessageSent.STATUS_SUCCESS me.message_id = msgid elif msgid: me.status = msgid else: me.status = c2s.MessagePostResponse.MessageSent.STATUS_ERROR elif name == "MessageAckRequest": if self.service.is_logged(): r = c2s.MessageAckResponse() res = self.service.ack_message(str(tx_id), tuple(data.message_id)) for _msgid in data.message_id: msgid = str(_msgid) e = r.entry.add() e.message_id = msgid try: success = res[msgid] if success: e.status = c2s.MessageAckResponse.Entry.STATUS_SUCCESS else: e.status = c2s.MessageAckResponse.Entry.STATUS_NOTFOUND except: import traceback traceback.print_exc() e.status = c2s.MessageAckResponse.Entry.STATUS_ERROR elif name == "RegistrationRequest": if not self.service.is_logged(): r = c2s.RegistrationResponse() res = self.service.register_user(str(tx_id), str(data.username)) r.status = res["status"] if "token" in res: r.token = res["token"] if "sms_from" in res: r.sms_from = res["sms_from"] if "email_from" in res: r.email_from = res["email_from"] elif name == "ValidationRequest": if not self.service.is_logged(): r = c2s.ValidationResponse() res, token = self.service.validate_user(str(tx_id), str(data.validation_code)) r.status = res if token: r.token = token elif name == "UserLookupRequest": if self.service.is_logged(): # lookup can delay operations by a long time, so defer it def lookup_complete(found, tx_id, return_value=False): r = c2s.UserLookupResponse() for u in found: e = r.entry.add() e.user_id = u["userid"] if "status" in u: e.status = u["status"] if "timestamp" in u: e.timestamp = u["timestamp"] if "timediff" in u: e.timediff = u["timediff"] if return_value: return r else: self.sendBox(r, tx_id) # force generic users only found = self.service.lookup_users(tx_id, [str(x)[: utils.USERID_LENGTH] for x in data.user_id]) if isinstance(found, defer.Deferred): found.addCallback(lookup_complete, tx_id) else: r = lookup_complete(found, tx_id, True) elif name == "ServerInfoRequest": r = c2s.ServerInfoResponse() info = self.service.serverinfo(str(tx_id), str(data.client_version), data.client_protocol) r.version = info["version"] r.client_protocol = info["client-protocol"] r.server_protocol = info["server-protocol"] r.fingerprint = info["fingerprint"] r.supports.extend(info["supports"]) elif name == "UserInfoUpdateRequest": if self.service.is_logged(): r = c2s.UserInfoUpdateResponse() flags = None if data.HasField("flags"): flags = data.flags if data.HasField("status_message"): status_msg = data.status_message.encode("utf-8") else: status_msg = None if data.HasField("google_registration_id"): google_regid = str(data.google_registration_id) else: google_regid = None r.status = self.service.user_update(flags, status_msg, google_regid) elif name == "UserPresenceSubscribeRequest": if self.service.is_logged(): r = c2s.UserPresenceSubscribeResponse() r.status = self.service.user_presence_subscribe(str(data.user_id), data.events) elif name == "Pong": # client has responded: check tx_id if self.ping_txid == tx_id: # reset ping timeout check try: self.pinger.cancel() except: pass self.pinger = None elif name == "ValidationCodeRequest": if self.service.is_logged(): code = self.service.revalidate() r = c2s.ValidationCodeResponse() if type(code) == str: r.status = c2s.ValidationCodeResponse.STATUS_SUCCESS r.code = code else: r.status = c2s.ValidationCodeResponse.STATUS_ERROR if r: self.sendBox(r, tx_id) # reset idler (again) try: self.idler.reset() except: pass