Beispiel #1
0
class Consumer(object):
    """A simple Kafka consumer implementation

    This consumer consumes a single partition from a single topic, optionally
    automatically committing offsets.  Use it as follows:

      * Create an instance of :class:`afkak.KafkaClient` with cluster
        connectivity details.
      * Create the :class:`Consumer`, supplying the client, topic, partition,
        processor function, and optionally fetch specifics, a consumer group,
        and a commit policy.
      * Call :meth:`.start` with the offset within the partition at which to
        start consuming messages. See :meth:`.start` for details.
      * Process the messages in your :attr:`.processor` callback, returning a
        :class:`~twisted.internet.defer.Deferred` to provide backpressure as
        needed.
      * Once processing resolves, :attr:`.processor` will be called again with
        the next batch of messages.
      * When desired, call :meth:`.stop` on the :class:`Consumer` to halt
        calls to the :attr:`processor` function and cancel any outstanding
        requests to the Kafka cluster.

    A :class:`Consumer` may be restarted once stopped.

    :ivar client:
        Connected :class:`KafkaClient` for submitting requests to the Kafka
        cluster.
    :ivar bytes topic:
        The topic from which to consume messages.
    :ivar int partition:
        The partition from which to consume.
    :ivar callable processor:
        The callback function to which the consumer and lists of messages
        (:class:`afkak.common.SourcedMessage`) will be submitted
        for processing.  The function may return
        a :class:`~twisted.internet.defer.Deferred` and will not be called
        again until this Deferred resolves.
    :ivar bytes consumer_group:
        Optional consumer group ID for committing offsets of processed
        messages back to Kafka.
    :ivar bytes commit_metadata:
        Optional metadata to store with offsets commit.
    :ivar int auto_commit_every_n:
        Number of messages after which the consumer will automatically
        commit the offset of the last processed message to Kafka. Zero
        disables, defaulted to :data:`AUTO_COMMIT_MSG_COUNT`.
    :ivar int auto_commit_every_ms:
        Time interval in milliseconds after which the consumer will
        automatically commit the offset of the last processed message to
        Kafka. Zero disables, defaulted to :data:`AUTO_COMMIT_INTERVAL`.
    :ivar int fetch_size_bytes:
        Number of bytes to request in a :class:`FetchRequest`.  Kafka will
        defer fulfilling the request until at least this many bytes can be
        returned.
    :ivar int fetch_max_wait_time:
        Max number of milliseconds the server should wait for that many
        bytes.
    :ivar int buffer_size:
        default 128K. Initial number of bytes to tell Kafka we have
        available. This will be raised x16 up to 1MB then double up to...
    :ivar int max_buffer_size:
        Max number of bytes to tell Kafka we have available.  `None` means
        no limit (the default). Must be larger than the largest message we
        will find in our topic/partitions.
    :ivar float request_retry_init_delay:
        Number of seconds to wait before retrying a failed request to
        Kafka.
    :ivar float request_retry_max_delay:
        Maximum number of seconds to wait before retrying a failed request
        to Kafka (the delay is increased on each failure and reset to the
        initial delay upon success).
    :ivar int request_retry_max_attempts:
        Maximum number of attempts to make for any request. Default of zero
        means retry forever; other values must be positive and indicate
        the number of attempts to make before returning failure.

    """
    def __init__(self, client, topic, partition, processor,
                 consumer_group=None,
                 commit_metadata=None,
                 auto_commit_every_n=None,
                 auto_commit_every_ms=None,
                 fetch_size_bytes=FETCH_MIN_BYTES,
                 fetch_max_wait_time=FETCH_MAX_WAIT_TIME,
                 buffer_size=FETCH_BUFFER_SIZE_BYTES,
                 max_buffer_size=None,
                 request_retry_init_delay=REQUEST_RETRY_MIN_DELAY,
                 request_retry_max_delay=REQUEST_RETRY_MAX_DELAY,
                 request_retry_max_attempts=0):
        # Store away parameters
        self.client = client  # KafkaClient
        self.topic = topic = _coerce_topic(topic)
        self.partition = partition  # The partition within the topic we consume
        self.processor = processor  # The callback we call with the msg list
        # Commit related parameters (Ensure the attr. exist, even if None)
        if consumer_group is not None:
            consumer_group = _coerce_consumer_group(consumer_group)
        self.consumer_group = consumer_group
        self.commit_metadata = commit_metadata
        if commit_metadata is not None and not isinstance(commit_metadata, bytes):
            raise TypeError('commit_metadata={!r} should be bytes'.format(
                commit_metadata))
        self.auto_commit_every_n = None
        self.auto_commit_every_s = None
        if consumer_group:
            # Auto committing is possible...
            if auto_commit_every_n is None:
                auto_commit_every_n = AUTO_COMMIT_MSG_COUNT
            if auto_commit_every_ms is None:
                auto_commit_every_ms = AUTO_COMMIT_INTERVAL
            if not isinstance(auto_commit_every_n, Integral):
                raise ValueError('auto_commit_every_n parameter must be '
                                 'subtype of Integral')
            if not isinstance(auto_commit_every_ms, Integral):
                raise ValueError('auto_commit_every_ms parameter must be '
                                 'subtype of Integral')
            if auto_commit_every_ms < 0 or auto_commit_every_n < 0:
                raise ValueError('auto_commit_every_ms and auto_commit_every_n'
                                 ' must be non-negative')
            self.auto_commit_every_n = auto_commit_every_n
            self.auto_commit_every_s = float(auto_commit_every_ms) / 1000
        else:
            if auto_commit_every_ms or auto_commit_every_n:
                raise ValueError('An auto_commit_every_x argument set without '
                                 'specifying consumer_group')
        # Fetch related instance variables
        self.fetch_min_bytes = fetch_size_bytes
        self.fetch_max_wait_time = int(fetch_max_wait_time)
        self.buffer_size = buffer_size
        self.max_buffer_size = max_buffer_size
        # request retry timing
        self.retry_delay = float(request_retry_init_delay)  # fetch only
        self.retry_init_delay = float(request_retry_init_delay)
        self.retry_max_delay = float(request_retry_max_delay)
        self.request_retry_max_attempts = int(request_retry_max_attempts)
        if (not isinstance(request_retry_max_attempts, Integral) or
                request_retry_max_attempts < 0):
            raise ValueError(
                'request_retry_max_attempts must be non-negative integer')
        self._fetch_attempt_count = 1

        # # Internal state tracking attributes
        self._fetch_offset = None  # We don't know at what offset to fetch yet
        self._last_processed_offset = None  # Last msg processed offset
        self._last_committed_offset = None  # The last offset stored in Kafka
        self._stopping = False  # We're not stopping yet...
        self._shuttingdown = False  # We're not shutting down either
        self._shutdown_d = None  # deferred for tracking shutdown request
        self._commit_looper = None  # Looping call for auto-commit
        self._commit_looper_d = None  # Deferred for running looping call
        self._commit_ds = []  # Deferreds to notify when commit completes
        self._commit_req = None  # Track outstanding commit request
        # For tracking various async operations
        self._start_d = None  # deferred for alerting user of errors
        self._request_d = None  # outstanding KafkaClient request deferred
        self._retry_call = None  # IDelayedCall object for delayed retries
        self._commit_call = None  # IDelayedCall for delayed commit retries
        self._msg_block_d = None  # deferred for each block of messages
        self._processor_d = None  # deferred for a result from processor
        self._state = '[initialized]'  # Keep track of state for debugging
        # Check parameters for sanity
        if max_buffer_size is not None and buffer_size > max_buffer_size:
            raise ValueError("buffer_size (%d) is greater than "
                             "max_buffer_size (%d)" %
                             (buffer_size, max_buffer_size))
        if not isinstance(self.partition, Integral):
            raise ValueError('partition parameter must be subtype of Integral')

    def __repr__(self):
        return '<{} {} topic={}, partition={}, processor={}>'.format(
            self.__class__.__name__, self._state,
            self.topic, self.partition, self.processor,
        )

    def start(self, start_offset):
        """
        Starts fetching messages from Kafka and delivering them to the
        :attr:`.processor` function.

        :param int start_offset:
            The offset within the partition from which to start fetching.
            Special values include: :const:`OFFSET_EARLIEST`,
            :const:`OFFSET_LATEST`, and :const:`OFFSET_COMMITTED`. If the
            supplied offset is :const:`OFFSET_EARLIEST` or
            :const:`OFFSET_LATEST` the :class:`Consumer` will use the
            OffsetRequest Kafka API to retrieve the actual offset used for
            fetching. In the case :const:`OFFSET_COMMITTED` is used,
            `commit_policy` MUST be set on the Consumer, and the Consumer
            will use the OffsetFetchRequest Kafka API to retrieve the actual
            offset used for fetching.

        :returns:
            A :class:`~twisted.internet.defer.Deferred` which will resolve
            successfully when the consumer is cleanly stopped, or with
            a failure if the :class:`Consumer` encounters an error from which
            it is unable to recover.

        :raises: :exc:`RestartError` if already running.
        """
        # Have we been started already, and not stopped?
        if self._start_d is not None:
            raise RestartError("Start called on already-started consumer")

        # Keep track of state for debugging
        self._state = '[started]'

        # Create and return a deferred for alerting on errors/stopage
        start_d = self._start_d = Deferred()

        # Start a new fetch request, possibly just for the starting offset
        self._fetch_offset = start_offset
        self._do_fetch()

        # Set up the auto-commit timer, if needed
        if self.consumer_group and self.auto_commit_every_s:
            self._commit_looper = LoopingCall(self._auto_commit)
            self._commit_looper.clock = self.client.reactor
            self._commit_looper_d = self._commit_looper.start(
                self.auto_commit_every_s, now=False)
            self._commit_looper_d.addCallbacks(self._commit_timer_stopped,
                                               self._commit_timer_failed)
        return start_d

    def shutdown(self):
        """Gracefully shutdown the consumer

        Consumer will complete any outstanding processing, commit its current
        offsets (if so configured) and stop.

        Returns deferred which callbacks with a tuple of:
        (last processed offset, last committed offset) if it was able to
        successfully commit, or errbacks with the commit failure, if any,
        or fail(RestopError) if consumer is not running.
        """
        def _handle_shutdown_commit_success(result):
            """Handle the result of the commit attempted by shutdown"""
            self._shutdown_d, d = None, self._shutdown_d
            self.stop()
            self._shuttingdown = False  # Shutdown complete
            d.callback((self._last_processed_offset,
                       self._last_committed_offset))

        def _handle_shutdown_commit_failure(failure):
            """Handle failure of commit() attempted by shutdown"""
            if failure.check(OperationInProgress):
                failure.value.deferred.addCallback(_commit_and_stop)
                return

            self._shutdown_d, d = None, self._shutdown_d
            self.stop()
            self._shuttingdown = False  # Shutdown complete
            d.errback(failure)

        def _commit_and_stop(result):
            """Commit the current offsets (if needed) and stop the consumer"""
            if not self.consumer_group:  # No consumer group, no committing
                return _handle_shutdown_commit_success(None)

            # Need to commit prior to stopping
            self.commit().addCallbacks(_handle_shutdown_commit_success,
                                       _handle_shutdown_commit_failure)

        # If we're not running, return an failure
        if self._start_d is None:
            return fail(Failure(
                RestopError("Shutdown called on non-running consumer")))
        # If we're called multiple times, return a failure
        if self._shutdown_d:
            return fail(Failure(
                RestopError("Shutdown called more than once.")))
        # Set our _shuttingdown flag, so our _process_message routine will stop
        # feeding new messages to the processor, and fetches won't be retried
        self._shuttingdown = True
        # Keep track of state for debugging
        self._state = '[shutting down]'

        # Create a deferred to track the shutdown
        self._shutdown_d = d = Deferred()

        # Are we waiting for the processor to complete? If so, when it's done,
        # commit our offsets and stop.
        if self._processor_d:
            self._processor_d.addCallback(_commit_and_stop)
        else:
            # No need to wait for the processor, we can commit and stop now
            _commit_and_stop(None)

        # return the deferred
        return d

    def stop(self):
        """
        Stop the consumer and return offset of last processed message.  This
        cancels all outstanding operations.  Also, if the deferred returned
        by `start` hasn't been called, it is called with a tuple consisting
        of the last processed offset and the last committed offset.

        :raises: :exc:`RestopError` if the :class:`Consumer` is not running.
        """
        if self._start_d is None:
            raise RestopError("Stop called on non-running consumer")

        self._stopping = True
        # Keep track of state for debugging
        self._state = '[stopping]'
        # Are we waiting for a request to come back?
        if self._request_d:
            self._request_d.cancel()
        # Are we working our way through a block of messages?
        if self._msg_block_d:
            # Need to add a cancel handler...
            _msg_block_d, self._msg_block_d = self._msg_block_d, None
            _msg_block_d.addErrback(lambda fail: fail.trap(CancelledError))
            _msg_block_d.cancel()
        # Are we waiting for the processor to complete?
        if self._processor_d:
            self._processor_d.cancel()
        # Are we waiting to retry a request?
        if self._retry_call:
            self._retry_call.cancel()
        # Are we waiting on a commit request?
        if self._commit_ds:
            while self._commit_ds:
                d = self._commit_ds.pop()
                d.cancel()
        if self._commit_req:
            self._commit_req.cancel()
        # Are we waiting to retry a commit?
        if self._commit_call:
            self._commit_call.cancel()
        # Do we have an auto-commit looping call?
        if self._commit_looper is not None:
            self._commit_looper.stop()
        # Done stopping
        self._stopping = False
        # Keep track of state for debugging
        self._state = '[stopped]'

        # Clear and possibly callback our start() Deferred
        self._start_d, d = None, self._start_d
        if not d.called:
            d.callback((self._last_processed_offset,
                        self._last_committed_offset))

        # Return the offset of the message we last processed
        return self._last_processed_offset

    def commit(self):
        """
        Commit the offset of the message we last processed if it is different
        from what we believe is the last offset committed to Kafka.

        .. note::

            It is possible to commit a smaller offset than Kafka has stored.
            This is by design, so we can reprocess a Kafka message stream if
            desired.

        On error, will retry according to :attr:`request_retry_max_attempts`
        (by default, forever).

        If called while a commit operation is in progress, and new messages
        have been processed since the last request was sent then the commit
        will fail with :exc:`OperationInProgress`.  The
        :exc:`OperationInProgress` exception wraps
        a :class:`~twisted.internet.defer.Deferred` which fires when the
        outstanding commit operation completes.

        :returns:
            A :class:`~twisted.internet.defer.Deferred` which resolves with the
            committed offset when the operation has completed.  It will resolve
            immediately if the current offset and the last committed offset do
            not differ.
        """
        # Can't commit without a consumer_group
        if not self.consumer_group:
            return fail(Failure(InvalidConsumerGroupError(
                "Bad Group_id:{0!r}".format(self.consumer_group))))
        # short circuit if we are 'up to date', or haven't processed anything
        if ((self._last_processed_offset is None) or
                (self._last_processed_offset == self._last_committed_offset)):
            return succeed(self._last_committed_offset)

        # If we're currently processing a commit we return a failure
        # with a deferred we'll fire when the in-progress one completes
        if self._commit_ds:
            d = Deferred()
            self._commit_ds.append(d)
            return fail(OperationInProgress(d))

        # Ok, we have processed messages since our last commit attempt, and
        # we're not currently waiting on a commit request to complete:
        # Start a new one
        d = Deferred()
        self._commit_ds.append(d)

        # Send the request
        self._send_commit_request()

        # Reset the commit_looper here, rather than on success to give
        # more stability to the commit interval.
        if self._commit_looper is not None:
            self._commit_looper.reset()

        # return the deferred
        return d

    # # Private Methods # #

    def _retry_auto_commit(self, result, by_count=False):
        self._auto_commit(by_count)
        return result

    def _auto_commit(self, by_count=False):
        """Check if we should start a new commit operation and commit"""
        # Check if we are even supposed to do any auto-committing
        if (self._stopping or self._shuttingdown or (not self._start_d) or
                (self._last_processed_offset is None) or
                (not self.consumer_group) or
                (by_count and not self.auto_commit_every_n)):
            return

        # If we're auto_committing because the timer expired, or by count and
        # we don't have a record of our last_committed_offset, or we've
        # processed enough messages since our last commit, then try to commit
        if (not by_count or self._last_committed_offset is None or
            (self._last_processed_offset - self._last_committed_offset
             ) >= self.auto_commit_every_n):
            if not self._commit_ds:
                commit_d = self.commit()
                commit_d.addErrback(self._handle_auto_commit_error)
            else:
                # We're waiting on the last commit to complete, so add a
                # callback to be called when the current request completes
                d = Deferred()
                d.addCallback(self._retry_auto_commit, by_count)
                self._commit_ds.append(d)

    def _retry_fetch(self, after=None):
        """
        Schedule a delayed :meth:`_do_fetch` call after a failure

        :param float after:
            The delay in seconds after which to do the retried fetch. If
            `None`, our internal :attr:`retry_delay` is used, and adjusted by
            :const:`REQUEST_RETRY_FACTOR`.
        """

        # Have we been told to stop or shutdown?  Then don't actually retry.
        if self._stopping or self._shuttingdown or self._start_d is None:
            # Stopping, or stopped already? No more fetching.
            return
        if self._retry_call is None:
            if after is None:
                after = self.retry_delay
                self.retry_delay = min(self.retry_delay * REQUEST_RETRY_FACTOR,
                                       self.retry_max_delay)

            self._fetch_attempt_count += 1
            self._retry_call = self.client.reactor.callLater(
                after, self._do_fetch)

    def _handle_offset_response(self, response):
        """
        Handle responses to both OffsetRequest and OffsetFetchRequest, since
        they are similar enough.

        :param response:
            A tuple of a single OffsetFetchResponse or OffsetResponse
        """
        # Got a response, clear our outstanding request deferred
        self._request_d = None

        # Successful request, reset our retry delay, count, etc
        self.retry_delay = self.retry_init_delay
        self._fetch_attempt_count = 1

        response = response[0]
        if hasattr(response, 'offsets'):
            # It's a response to an OffsetRequest
            self._fetch_offset = response.offsets[0]
        else:
            # It's a response to an OffsetFetchRequest
            # Make sure we got a valid offset back. Kafka uses -1 to indicate
            # no committed offset was retrieved
            if response.offset == OFFSET_NOT_COMMITTED:
                self._fetch_offset = OFFSET_EARLIEST
            else:
                self._fetch_offset = response.offset + 1
                self._last_committed_offset = response.offset
        self._do_fetch()

    def _handle_offset_error(self, failure):
        """
        Retry the offset fetch request if appropriate.

        Once the :attr:`.retry_delay` reaches our :attr:`.retry_max_delay`, we
        log a warning.  This should perhaps be extended to abort sooner on
        certain errors.
        """
        # outstanding request got errback'd, clear it
        self._request_d = None

        if self._stopping and failure.check(CancelledError):
            # Not really an error
            return
        # Do we need to abort?
        if (self.request_retry_max_attempts != 0 and
                self._fetch_attempt_count >= self.request_retry_max_attempts):
            log.debug(
                "%r: Exhausted attempts: %d fetching offset from kafka: %r",
                self, self.request_retry_max_attempts, failure)
            self._start_d.errback(failure)
            return
        # Decide how to log this failure... If we have retried so many times
        # we're at the retry_max_delay, then we log at warning every other time
        # debug otherwise
        if (self.retry_delay < self.retry_max_delay or
                0 == (self._fetch_attempt_count % 2)):
            log.debug("%r: Failure fetching offset from kafka: %r", self,
                      failure)
        else:
            # We've retried until we hit the max delay, log at warn
            log.warning("%r: Still failing fetching offset from kafka: %r",
                        self, failure)
        self._retry_fetch()

    def _clear_processor_deferred(self, result):
        self._processor_d = None  # It has fired, we can clear it
        return result

    def _update_processed_offset(self, result, offset):
        self._last_processed_offset = offset
        self._auto_commit(by_count=True)

    def _clear_commit_req(self, result):
        self._commit_req = None  # It has fired, we can clear it
        return result

    def _update_committed_offset(self, result, offset):
        # successful commit request completed
        self._last_committed_offset = offset
        self._deliver_commit_result(offset)
        return offset

    def _deliver_commit_result(self, result):
        # Let anyone waiting know the commit completed. Handle the case where
        # they try to commit from the callback by preserving self._commit_ds
        # as a local, but clearing the attribute itself.
        commit_ds, self._commit_ds = self._commit_ds, []
        while commit_ds:
            d = commit_ds.pop()
            d.callback(result)

    def _send_commit_request(self, retry_delay=None, attempt=None):
        """Send a commit request with our last_processed_offset"""
        # If there's a _commit_call, and it's not active, clear it, it probably
        # just called us...
        if self._commit_call and not self._commit_call.active():
            self._commit_call = None

        # Make sure we only have one outstanding commit request at a time
        if self._commit_req is not None:
            raise OperationInProgress(self._commit_req)

        # Handle defaults
        if retry_delay is None:
            retry_delay = self.retry_init_delay
        if attempt is None:
            attempt = 1

        # Create new OffsetCommitRequest with the latest processed offset
        commit_offset = self._last_processed_offset
        commit_request = OffsetCommitRequest(
            self.topic, self.partition, commit_offset,
            TIMESTAMP_INVALID, self.commit_metadata)
        log.debug("Committing off=%d grp=%s tpc=%s part=%s req=%r",
                  self._last_processed_offset, self.consumer_group,
                  self.topic, self.partition, commit_request)

        # Send the request, add our callbacks
        self._commit_req = d = self.client.send_offset_commit_request(
            self.consumer_group, [commit_request])

        d.addBoth(self._clear_commit_req)
        d.addCallbacks(
            self._update_committed_offset, self._handle_commit_error,
            callbackArgs=(commit_offset,),
            errbackArgs=(retry_delay, attempt))

    def _handle_commit_error(self, failure, retry_delay, attempt):
        """ Retry the commit request, depending on failure type

        Depending on the type of the failure, we retry the commit request
        with the latest processed offset, or callback/errback self._commit_ds
        """
        # Check if we are stopping and the request was cancelled
        if self._stopping and failure.check(CancelledError):
            # Not really an error
            return self._deliver_commit_result(self._last_committed_offset)

        # Check that the failure type is a Kafka error...this could maybe be
        # a tighter check to determine whether a retry will succeed...
        if not failure.check(KafkaError):
            log.error("Unhandleable failure during commit attempt: %r\n\t%r",
                      failure, failure.getBriefTraceback())
            return self._deliver_commit_result(failure)

        # Do we need to abort?
        if (self.request_retry_max_attempts != 0 and
                attempt >= self.request_retry_max_attempts):
            log.debug("%r: Exhausted attempts: %d to commit offset: %r",
                      self, self.request_retry_max_attempts, failure)
            return self._deliver_commit_result(failure)

        # Check the retry_delay to see if we should log at the higher level
        # Using attempts % 2 gets us 1-warn/minute with defaults timings
        if (retry_delay < self.retry_max_delay or 0 == (attempt % 2)):
            log.debug("%r: Failure committing offset to kafka: %r", self,
                      failure)
        else:
            # We've retried until we hit the max delay, log alternately at warn
            log.warning("%r: Still failing committing offset to kafka: %r",
                        self, failure)

        # Schedule a delayed call to retry the commit
        retry_delay = min(retry_delay * REQUEST_RETRY_FACTOR,
                          self.retry_max_delay)
        self._commit_call = self.client.reactor.callLater(
            retry_delay, self._send_commit_request, retry_delay, attempt + 1)

    def _handle_auto_commit_error(self, failure):
        if self._start_d is not None and not self._start_d.called:
            self._start_d.errback(failure)

    def _handle_processor_error(self, failure):
        """Handle a failure in the processing of a block of messages

        This method is called when the processor func fails while processing
        a block of messages. Since we can't know how best to handle a
        processor failure, we just :func:`errback` our :func:`start` method's
        deferred to let our user know about the failure.
        """
        # Check if we're stopping/stopped and the errback of the processor
        # deferred is just the cancelling we initiated.  If so, we skip
        # notifying via the _start_d deferred, as it will be 'callback'd at the
        # end of stop()
        if not (self._stopping and failure.check(CancelledError)):
            if self._start_d:  # Make sure we're not already stopped
                self._start_d.errback(failure)

    def _handle_fetch_error(self, failure):
        """A fetch request resulted in an error. Retry after our current delay

        When a fetch error occurs, we check to see if the Consumer is being
        stopped, and if so just return, trapping the CancelledError. If not, we
        check if the Consumer has a non-zero setting for
        :attr:`request_retry_max_attempts` and if so and we have reached that limit we
        errback() the Consumer's start() deferred with the failure. If not, we
        determine whether to log at debug or warning (we log at warning every
        other retry after backing off to the max retry delay, resulting in a
        warning message approximately once per minute with the default timings)
        We then wait our current :attr:`retry_delay`, and retry the fetch. We
        also increase our retry_delay by Apery's constant (1.20205) and note
        the failed fetch by incrementing :attr:`_fetch_attempt_count`.

        NOTE: this may retry forever.
        TODO: Possibly make this differentiate based on the failure
        """
        # The _request_d deferred has fired, clear it.
        self._request_d = None
        if self._stopping and failure.check(CancelledError):
            # Not really an error
            return
        # Do we need to abort?
        if (self.request_retry_max_attempts != 0 and
                self._fetch_attempt_count >= self.request_retry_max_attempts):
            log.debug(
                "%r: Exhausted attempts: %d fetching messages from kafka: %r",
                self, self.request_retry_max_attempts, failure)
            self._start_d.errback(failure)
            return
        # Decide how to log this failure... If we have retried so many times
        # we're at the retry_max_delay, then we log at warning every other time
        # debug otherwise
        if (self.retry_delay < self.retry_max_delay or
                0 == (self._fetch_attempt_count % 2)):
            log.debug("%r: Failure fetching messages from kafka: %r", self,
                      failure)
        else:
            # We've retried until we hit the max delay, log at warn
            log.warning("%r: Still failing fetching messages from kafka: %r",
                        self, failure)
        self._retry_fetch()

    def _handle_fetch_response(self, responses):
        """The callback handling the successful response from the fetch request

        Delivers the message list to the processor, handles per-message errors
        (ConsumerFetchSizeTooSmall), triggers another fetch request

        If the processor is still processing the last batch of messages, we
        defer this processing until it's done.  Otherwise, we start another
        fetch request and submit the messages to the processor
        """
        # Successful fetch, reset our retry delay
        self.retry_delay = self.retry_init_delay
        self._fetch_attempt_count = 1

        # Check to see if we are still processing the last block we fetched...
        if self._msg_block_d:
            # We are still working through the last block of messages...
            # We have to wait until it's done, then process this response
            self._msg_block_d.addCallback(
                lambda _: self._handle_fetch_response(responses))
            return

        # No ongoing processing, great, let's get some started.
        # Request no longer outstanding, clear the deferred tracker so we
        # can refetch
        self._request_d = None
        messages = []
        try:
            for resp in responses:  # We should really only ever get one...
                if resp.partition != self.partition:
                    log.warning(
                        "%r: Got response with partition: %r not our own: %r",
                        self, resp.partition, self.partition)
                    continue
                # resp.messages is a KafkaCodec._decode_message_set_iter
                # Note that 'message' here is really an OffsetAndMessage
                for message in resp.messages:
                    # Check for messages included which are from prior to our
                    # desired offset: can happen due to compressed message sets
                    if message.offset < self._fetch_offset:
                        log.debug(
                            'Skipping message at offset: %d, because its '
                            'offset is less that our fetch offset: %d.',
                            message.offset, self._fetch_offset)
                        continue
                    # Create a 'SourcedMessage' and add it to the messages list
                    messages.append(
                        SourcedMessage(
                            message=message.message,
                            offset=message.offset, topic=self.topic,
                            partition=self.partition))
                    # Update our notion of from where to fetch.
                    self._fetch_offset = message.offset + 1
        except ConsumerFetchSizeTooSmall:
            # A message was too large for us to receive, given our current
            # buffer size. Grow it until it works, or we hit our max
            # Grow by 16x up to 1MB (could result in 16MB buf), then by 2x
            factor = 2
            if self.buffer_size <= 2**20:
                factor = 16
            if self.max_buffer_size is None:
                # No limit, increase until we succeed or fail to alloc RAM
                self.buffer_size *= factor
            elif (self.max_buffer_size is not None and
                    self.buffer_size < self.max_buffer_size):
                # Limited, but currently below it.
                self.buffer_size = min(
                    self.buffer_size * factor, self.max_buffer_size)
            else:
                # We failed, and are already at our max. Nothing we can do but
                # create a Failure and errback() our start() deferred
                log.error("Max fetch size %d too small", self.max_buffer_size)
                failure = Failure(
                    ConsumerFetchSizeTooSmall(
                        "Max buffer size:%d too small for message",
                        self.max_buffer_size))
                self._start_d.errback(failure)
                return

            log.debug(
                "Next message larger than fetch size, increasing "
                "to %d (~2x) and retrying", self.buffer_size)

        finally:
            # If we were able to extract any messages, deliver them to the
            # processor now.
            if messages:
                self._msg_block_d = Deferred()
                self._process_messages(messages)

        # start another fetch, if needed, but use callLater to avoid recursion
        self._retry_fetch(0)

    def _process_messages(self, messages):
        """Send messages to the `processor` callback to be processed

        In the case we have a commit policy, we send messages to the processor
        in blocks no bigger than auto_commit_every_n (if set). Otherwise, we
        send the entire message block to be processed.

        """
        # Have we been told to shutdown?
        if self._shuttingdown:
            return
        # Do we have any messages to process?
        if not messages:
            # No, we're done with this block. If we had another fetch result
            # waiting, this callback will trigger the processing thereof.
            if self._msg_block_d:
                _msg_block_d, self._msg_block_d = self._msg_block_d, None
                _msg_block_d.callback(True)
                return
        # Yes, we've got some messages to process.
        # Default to processing the entire block...
        proc_block_size = sys.maxsize
        # Unless our auto commit_policy restricts us to process less
        if self.auto_commit_every_n:
            proc_block_size = self.auto_commit_every_n

        # Divide messages into two lists: one to process now, and remainder
        msgs_to_proc = messages[:proc_block_size]
        msgs_remainder = messages[proc_block_size:]
        # Call our processor callable and handle the possibility it returned
        # a deferred...
        last_offset = msgs_to_proc[-1].offset
        self._processor_d = d = maybeDeferred(self.processor, self, msgs_to_proc)
        log.debug('self.processor return: %r, last_offset: %r', d, last_offset)
        # Once the processor completes, clear our _processor_d
        d.addBoth(self._clear_processor_deferred)
        # Record the offset of the last processed message and check autocommit
        d.addCallback(self._update_processed_offset, last_offset)
        # If we were stopped, cancel the processor deferred. Note, we have to
        # do this here, in addition to in stop() because the processor func
        # itself could have called stop(), and then when it returned, we re-set
        # self._processor_d to the return of maybeDeferred().
        if self._stopping or self._start_d is None:
            d.cancel()
        else:
            # Setup to process the rest of our messages
            d.addCallback(lambda _: self._process_messages(msgs_remainder))
        # Add an error handler
        d.addErrback(self._handle_processor_error)

    def _do_fetch(self):
        """Send a fetch request if there isn't a request outstanding

        Sends a fetch request to the Kafka cluster to get messages at the
        current offset.  When the response comes back, if there are messages,
        it delivers them to the :attr:`processor` callback and initiates
        another fetch request.  If there is a recoverable error, the fetch is
        retried after :attr:`retry_delay`.

        In the case of an unrecoverable error, :func:`errback` is called on the
        :class:`Deferred` returned by :meth:`start()`.
        """
        # Check for outstanding request.
        if self._request_d:
            log.debug("_do_fetch: Outstanding request: %r", self._request_d)
            return

        # Cleanup our _retry_call, if we have one
        if self._retry_call is not None:
            if self._retry_call.active():
                self._retry_call.cancel()
            self._retry_call = None

        # Do we know our offset yet, or do we need to figure it out?
        if (self._fetch_offset == OFFSET_EARLIEST or
                self._fetch_offset == OFFSET_LATEST):
            # We need to fetch the offset for our topic/partition
            offset_request = OffsetRequest(
                self.topic, self.partition, self._fetch_offset, 1)
            self._request_d = self.client.send_offset_request([offset_request])
            self._request_d.addCallbacks(
                self._handle_offset_response, self._handle_offset_error)
        elif self._fetch_offset == OFFSET_COMMITTED:
            # We need to fetch the committed offset for our topic/partition
            # Note we use the same callbacks, as the responses are "close
            # enough" for our needs here
            if not self.consumer_group:
                # consumer_group must be set for OFFSET_COMMITTED
                failure = Failure(
                    InvalidConsumerGroupError("Bad Group_id:{0!r}".format(
                        self.consumer_group)))
                self._start_d.errback(failure)
            request = OffsetFetchRequest(self.topic, self.partition)
            self._request_d = self.client.send_offset_fetch_request(
                self.consumer_group, [request])
            self._request_d.addCallbacks(
                self._handle_offset_response, self._handle_offset_error)
        else:
            # Create fetch request payload for our partition
            request = FetchRequest(
                self.topic, self.partition, self._fetch_offset,
                self.buffer_size)
            # Send request and add handlers for the response
            self._request_d = self.client.send_fetch_request(
                [request], max_wait_time=self.fetch_max_wait_time,
                min_bytes=self.fetch_min_bytes)
            # We need a temp for this because if the response is already
            # available, _handle_fetch_response() will clear self._request_d
            d = self._request_d
            d.addCallback(self._handle_fetch_response)
            d.addErrback(self._handle_fetch_error)

    def _commit_timer_failed(self, fail):
        """Handle an error in the commit() function

        Our commit() function called by the LoopingCall failed. Some error
        probably came back from Kafka and check_error() raised the exception
        For now, just log the failure and restart the loop
        """
        log.warning(
            '_commit_timer_failed: uncaught error %r: %s in _auto_commit',
            fail, fail.getBriefTraceback())
        self._commit_looper_d = self._commit_looper.start(
            self.auto_commit_every_s, now=False)

    def _commit_timer_stopped(self, lCall):
        """We're shutting down, clean up our looping call..."""
        if self._commit_looper is not lCall:
            log.warning('_commit_timer_stopped with wrong timer:%s not:%s',
                        lCall, self._commit_looper)
        else:
            log.debug('_commit_timer_stopped: %s %s', lCall,
                      self._commit_looper)
            self._commit_looper = None
            self._commit_looper_d = None
Beispiel #2
0
class Consumer(object):
    """A simple Kafka consumer implementation

    This consumer consumes a single partition from a single topic, optionally
    automatically committing offsets.  Use it as follows:

    - Create an instance of :class:`afkak.KafkaClient` with cluster
      connectivity details.
    - Create the :class:`Consumer`, supplying the client, topic, partition,
      processor function, and optionally fetch specifics, a consumer group,
      and a commit policy.
    - Call :meth:`.start` with the offset within the partition at which to
      start consuming messages. See :meth:`.start` for details.
    - Process the messages in your :attr:`.processor` callback, returning a
      :class:`~twisted.internet.defer.Deferred` to provide backpressure as
      needed.
    - Once processing resolves, :attr:`.processor` will be called again with
      the next batch of messages.
    - When desired, call :meth:`.stop` on the :class:`Consumer` to halt
      calls to the :attr:`processor` function and cancel any outstanding
      requests to the Kafka cluster.

    A :class:`Consumer` may be restarted once stopped.

    :ivar client:
        Connected :class:`KafkaClient` for submitting requests to the Kafka
        cluster.
    :ivar bytes topic:
        The topic from which to consume messages.
    :ivar int partition:
        The partition from which to consume.
    :ivar callable processor:
        The callback function to which the consumer and lists of messages
        (:class:`afkak.common.SourcedMessage`) will be submitted
        for processing.  The function may return
        a :class:`~twisted.internet.defer.Deferred` and will not be called
        again until this Deferred resolves.
    :ivar bytes consumer_group:
        Optional consumer group ID for committing offsets of processed
        messages back to Kafka.
    :ivar bytes commit_metadata:
        Optional metadata to store with offsets commit.
    :ivar int auto_commit_every_n:
        Number of messages after which the consumer will automatically
        commit the offset of the last processed message to Kafka. Zero
        disables, defaulted to :data:`AUTO_COMMIT_MSG_COUNT`.
    :ivar int auto_commit_every_ms:
        Time interval in milliseconds after which the consumer will
        automatically commit the offset of the last processed message to
        Kafka. Zero disables, defaulted to :data:`AUTO_COMMIT_INTERVAL`.
    :ivar int fetch_size_bytes:
        Number of bytes to request in a :class:`FetchRequest`.  Kafka will
        defer fulfilling the request until at least this many bytes can be
        returned.
    :ivar int fetch_max_wait_time:
        Max number of milliseconds the server should wait for that many
        bytes.
    :ivar int buffer_size:
        default 128K. Initial number of bytes to tell Kafka we have
        available. This will be raised x16 up to 1MB then double up to...
    :ivar int max_buffer_size:
        Max number of bytes to tell Kafka we have available.  `None` means
        no limit (the default). Must be larger than the largest message we
        will find in our topic/partitions.
    :ivar float request_retry_init_delay:
        Number of seconds to wait before retrying a failed request to
        Kafka.
    :ivar float request_retry_max_delay:
        Maximum number of seconds to wait before retrying a failed request
        to Kafka (the delay is increased on each failure and reset to the
        initial delay upon success).
    :ivar int request_retry_max_attempts:
        Maximum number of attempts to make for any request. Default of zero
        means retry forever; other values must be positive and indicate
        the number of attempts to make before returning failure.

    :ivar int auto_offset_reset:
        What action should be taken when the broker responds to a fetch request
        with `OffsetOutOfRangeError`?

        - `OFFSET_EARLIEST`: request the oldest available messages. The
          consumer will read every message in the topic.
        - `OFFSET_LATEST`: request the most recent messages (this is the Java
          consumer's default). The consumer will read messages once new
          messages are produced to the topic.
        - `None`: fail on `OffsetOutOfRangeError` (Afkak's default). The
          `Deferred` returned by :meth:`Producer.start()` will errback. The caller
          may call :meth:`~.start()` again with the desired offset.

        The broker returns `OffsetOutOfRangeError` when the client requests an
        offset that isn't valid. This may mean that the requested offset no
        longer exists, e.g. if it was removed due to age.
    """
    def __init__(self, client, topic, partition, processor,
                 consumer_group=None,
                 commit_metadata=None,
                 auto_commit_every_n=None,
                 auto_commit_every_ms=None,
                 fetch_size_bytes=FETCH_MIN_BYTES,
                 fetch_max_wait_time=FETCH_MAX_WAIT_TIME,
                 buffer_size=FETCH_BUFFER_SIZE_BYTES,
                 max_buffer_size=None,
                 request_retry_init_delay=REQUEST_RETRY_MIN_DELAY,
                 request_retry_max_delay=REQUEST_RETRY_MAX_DELAY,
                 request_retry_max_attempts=0,
                 auto_offset_reset=None):
        # Store away parameters
        self.client = client  # KafkaClient
        self.topic = topic = _coerce_topic(topic)
        self.partition = partition  # The partition within the topic we consume
        self.processor = processor  # The callback we call with the msg list
        # Commit related parameters (Ensure the attr. exist, even if None)
        if consumer_group is not None:
            consumer_group = _coerce_consumer_group(consumer_group)
        self.consumer_group = consumer_group
        self.commit_metadata = commit_metadata
        if commit_metadata is not None and not isinstance(commit_metadata, bytes):
            raise TypeError('commit_metadata={!r} should be bytes'.format(
                commit_metadata))
        self.auto_commit_every_n = None
        self.auto_commit_every_s = None
        if consumer_group:
            # Auto committing is possible...
            if auto_commit_every_n is None:
                auto_commit_every_n = AUTO_COMMIT_MSG_COUNT
            if auto_commit_every_ms is None:
                auto_commit_every_ms = AUTO_COMMIT_INTERVAL
            if not isinstance(auto_commit_every_n, Integral):
                raise ValueError('auto_commit_every_n parameter must be '
                                 'subtype of Integral')
            if not isinstance(auto_commit_every_ms, Integral):
                raise ValueError('auto_commit_every_ms parameter must be '
                                 'subtype of Integral')
            if auto_commit_every_ms < 0 or auto_commit_every_n < 0:
                raise ValueError('auto_commit_every_ms and auto_commit_every_n'
                                 ' must be non-negative')
            self.auto_commit_every_n = auto_commit_every_n
            self.auto_commit_every_s = float(auto_commit_every_ms) / 1000
        else:
            if auto_commit_every_ms or auto_commit_every_n:
                raise ValueError('An auto_commit_every_x argument set without '
                                 'specifying consumer_group')
        # Fetch related instance variables
        self.fetch_min_bytes = fetch_size_bytes
        self.fetch_max_wait_time = int(fetch_max_wait_time)
        self.buffer_size = buffer_size
        self.max_buffer_size = max_buffer_size
        # request retry timing
        self.retry_delay = float(request_retry_init_delay)  # fetch only
        self.retry_init_delay = float(request_retry_init_delay)
        self.retry_max_delay = float(request_retry_max_delay)
        self.request_retry_max_attempts = int(request_retry_max_attempts)
        if (not isinstance(request_retry_max_attempts, Integral) or
                request_retry_max_attempts < 0):
            raise ValueError(
                'request_retry_max_attempts must be non-negative integer')
        self._fetch_attempt_count = 1
        if auto_offset_reset not in [None, OFFSET_EARLIEST, OFFSET_LATEST]:
            raise ValueError(
                "auto_offset_reset must be in 'None', 'OFFSET_EARLIEST', 'OFFSET_LATEST'")
        self.auto_offset_reset = auto_offset_reset
        # # Internal state tracking attributes
        self._fetch_offset = None  # We don't know at what offset to fetch yet
        self._last_processed_offset = None  # Last msg processed offset
        self._last_committed_offset = None  # The last offset stored in Kafka
        self._stopping = False  # We're not stopping yet...
        self._shuttingdown = False  # We're not shutting down either
        self._shutdown_d = None  # deferred for tracking shutdown request
        self._commit_looper = None  # Looping call for auto-commit
        self._commit_looper_d = None  # Deferred for running looping call
        self._commit_ds = []  # Deferreds to notify when commit completes
        self._commit_req = None  # Track outstanding commit request
        # For tracking various async operations
        self._start_d = None  # deferred for alerting user of errors
        self._request_d = None  # outstanding KafkaClient request deferred
        self._retry_call = None  # IDelayedCall object for delayed retries
        self._commit_call = None  # IDelayedCall for delayed commit retries
        self._msg_block_d = None  # deferred for each block of messages
        self._processor_d = None  # deferred for a result from processor
        self._state = '[initialized]'  # Keep track of state for debugging
        # Check parameters for sanity
        if max_buffer_size is not None and buffer_size > max_buffer_size:
            raise ValueError("buffer_size (%d) is greater than "
                             "max_buffer_size (%d)" %
                             (buffer_size, max_buffer_size))
        if not isinstance(self.partition, Integral):
            raise ValueError('partition parameter must be subtype of Integral')

    def __repr__(self):
        return '<{} {} topic={}, partition={}, processor={}>'.format(
            self.__class__.__name__, self._state,
            self.topic, self.partition, self.processor,
        )

    def start(self, start_offset):
        """
        Starts fetching messages from Kafka and delivering them to the
        :attr:`.processor` function.

        :param int start_offset:
            The offset within the partition from which to start fetching.
            Special values include: :const:`OFFSET_EARLIEST`,
            :const:`OFFSET_LATEST`, and :const:`OFFSET_COMMITTED`. If the
            supplied offset is :const:`OFFSET_EARLIEST` or
            :const:`OFFSET_LATEST` the :class:`Consumer` will use the
            OffsetRequest Kafka API to retrieve the actual offset used for
            fetching. In the case :const:`OFFSET_COMMITTED` is used,
            `commit_policy` MUST be set on the Consumer, and the Consumer
            will use the OffsetFetchRequest Kafka API to retrieve the actual
            offset used for fetching.

        :returns:
            A :class:`~twisted.internet.defer.Deferred` which will resolve
            successfully when the consumer is cleanly stopped, or with
            a failure if the :class:`Consumer` encounters an error from which
            it is unable to recover.

        :raises: :exc:`RestartError` if already running.
        """
        # Have we been started already, and not stopped?
        if self._start_d is not None:
            raise RestartError("Start called on already-started consumer")

        # Keep track of state for debugging
        self._state = '[started]'

        # Create and return a deferred for alerting on errors/stoppage
        start_d = self._start_d = Deferred()

        # Start a new fetch request, possibly just for the starting offset
        self._fetch_offset = start_offset
        self._do_fetch()

        # Set up the auto-commit timer, if needed
        if self.consumer_group and self.auto_commit_every_s:
            self._commit_looper = LoopingCall(self._auto_commit)
            self._commit_looper.clock = self.client.reactor
            self._commit_looper_d = self._commit_looper.start(
                self.auto_commit_every_s, now=False)
            self._commit_looper_d.addCallbacks(self._commit_timer_stopped,
                                               self._commit_timer_failed)
        return start_d

    def shutdown(self):
        """Gracefully shutdown the consumer

        Consumer will complete any outstanding processing, commit its current
        offsets (if so configured) and stop.

        Returns deferred which callbacks with a tuple of:
        (last processed offset, last committed offset) if it was able to
        successfully commit, or errbacks with the commit failure, if any,
        or fail(RestopError) if consumer is not running.
        """
        def _handle_shutdown_commit_success(result):
            """Handle the result of the commit attempted by shutdown"""
            self._shutdown_d, d = None, self._shutdown_d
            self.stop()
            self._shuttingdown = False  # Shutdown complete
            d.callback((self._last_processed_offset,
                       self._last_committed_offset))

        def _handle_shutdown_commit_failure(failure):
            """Handle failure of commit() attempted by shutdown"""
            if failure.check(OperationInProgress):
                failure.value.deferred.addCallback(_commit_and_stop)
                return

            self._shutdown_d, d = None, self._shutdown_d
            self.stop()
            self._shuttingdown = False  # Shutdown complete
            d.errback(failure)

        def _commit_and_stop(result):
            """Commit the current offsets (if needed) and stop the consumer"""
            if not self.consumer_group:  # No consumer group, no committing
                return _handle_shutdown_commit_success(None)

            # Need to commit prior to stopping
            self.commit().addCallbacks(_handle_shutdown_commit_success,
                                       _handle_shutdown_commit_failure)

        # If we're not running, return an failure
        if self._start_d is None:
            return fail(Failure(
                RestopError("Shutdown called on non-running consumer")))
        # If we're called multiple times, return a failure
        if self._shutdown_d:
            return fail(Failure(
                RestopError("Shutdown called more than once.")))
        # Set our _shuttingdown flag, so our _process_message routine will stop
        # feeding new messages to the processor, and fetches won't be retried
        self._shuttingdown = True
        # Keep track of state for debugging
        self._state = '[shutting down]'

        # Create a deferred to track the shutdown
        self._shutdown_d = d = Deferred()

        # Are we waiting for the processor to complete? If so, when it's done,
        # commit our offsets and stop.
        if self._processor_d:
            self._processor_d.addCallback(_commit_and_stop)
        else:
            # No need to wait for the processor, we can commit and stop now
            _commit_and_stop(None)

        # return the deferred
        return d

    def stop(self):
        """
        Stop the consumer and return offset of last processed message.  This
        cancels all outstanding operations.  Also, if the deferred returned
        by `start` hasn't been called, it is called with a tuple consisting
        of the last processed offset and the last committed offset.

        :raises: :exc:`RestopError` if the :class:`Consumer` is not running.
        """
        if self._start_d is None:
            raise RestopError("Stop called on non-running consumer")

        self._stopping = True
        # Keep track of state for debugging
        self._state = '[stopping]'
        # Are we waiting for a request to come back?
        if self._request_d:
            self._request_d.cancel()
        # Are we working our way through a block of messages?
        if self._msg_block_d:
            # Need to add a cancel handler...
            _msg_block_d, self._msg_block_d = self._msg_block_d, None
            _msg_block_d.addErrback(lambda fail: fail.trap(CancelledError))
            _msg_block_d.cancel()
        # Are we waiting for the processor to complete?
        if self._processor_d:
            self._processor_d.cancel()
        # Are we waiting to retry a request?
        if self._retry_call:
            self._retry_call.cancel()
        # Are we waiting on a commit request?
        if self._commit_ds:
            while self._commit_ds:
                d = self._commit_ds.pop()
                d.cancel()
        if self._commit_req:
            self._commit_req.cancel()
        # Are we waiting to retry a commit?
        if self._commit_call:
            self._commit_call.cancel()
        # Do we have an auto-commit looping call?
        if self._commit_looper is not None:
            self._commit_looper.stop()
        # Done stopping
        self._stopping = False
        # Keep track of state for debugging
        self._state = '[stopped]'

        # Clear and possibly callback our start() Deferred
        self._start_d, d = None, self._start_d
        if not d.called:
            d.callback((self._last_processed_offset,
                        self._last_committed_offset))

        # Return the offset of the message we last processed
        return self._last_processed_offset

    def commit(self):
        """
        Commit the offset of the message we last processed if it is different
        from what we believe is the last offset committed to Kafka.

        .. note::

            It is possible to commit a smaller offset than Kafka has stored.
            This is by design, so we can reprocess a Kafka message stream if
            desired.

        On error, will retry according to :attr:`request_retry_max_attempts`
        (by default, forever).

        If called while a commit operation is in progress, and new messages
        have been processed since the last request was sent then the commit
        will fail with :exc:`OperationInProgress`.  The
        :exc:`OperationInProgress` exception wraps
        a :class:`~twisted.internet.defer.Deferred` which fires when the
        outstanding commit operation completes.

        :returns:
            A :class:`~twisted.internet.defer.Deferred` which resolves with the
            committed offset when the operation has completed.  It will resolve
            immediately if the current offset and the last committed offset do
            not differ.
        """
        # Can't commit without a consumer_group
        if not self.consumer_group:
            return fail(Failure(InvalidConsumerGroupError(
                "Bad Group_id:{0!r}".format(self.consumer_group))))
        # short circuit if we are 'up to date', or haven't processed anything
        if ((self._last_processed_offset is None) or
                (self._last_processed_offset == self._last_committed_offset)):
            return succeed(self._last_committed_offset)

        # If we're currently processing a commit we return a failure
        # with a deferred we'll fire when the in-progress one completes
        if self._commit_ds:
            d = Deferred()
            self._commit_ds.append(d)
            return fail(OperationInProgress(d))

        # Ok, we have processed messages since our last commit attempt, and
        # we're not currently waiting on a commit request to complete:
        # Start a new one
        d = Deferred()
        self._commit_ds.append(d)

        # Send the request
        self._send_commit_request()

        # Reset the commit_looper here, rather than on success to give
        # more stability to the commit interval.
        if self._commit_looper is not None:
            self._commit_looper.reset()

        # return the deferred
        return d

    # # Private Methods # #

    def _retry_auto_commit(self, result, by_count=False):
        self._auto_commit(by_count)
        return result

    def _auto_commit(self, by_count=False):
        """Check if we should start a new commit operation and commit"""
        # Check if we are even supposed to do any auto-committing
        if (self._stopping or self._shuttingdown or (not self._start_d) or
                (self._last_processed_offset is None) or
                (not self.consumer_group) or
                (by_count and not self.auto_commit_every_n)):
            return

        # If we're auto_committing because the timer expired, or by count and
        # we don't have a record of our last_committed_offset, or we've
        # processed enough messages since our last commit, then try to commit
        if (not by_count or self._last_committed_offset is None or
            (self._last_processed_offset - self._last_committed_offset
             ) >= self.auto_commit_every_n):
            if not self._commit_ds:
                commit_d = self.commit()
                commit_d.addErrback(self._handle_auto_commit_error)
            else:
                # We're waiting on the last commit to complete, so add a
                # callback to be called when the current request completes
                d = Deferred()
                d.addCallback(self._retry_auto_commit, by_count)
                self._commit_ds.append(d)

    def _retry_fetch(self, after=None):
        """
        Schedule a delayed :meth:`_do_fetch` call after a failure

        :param float after:
            The delay in seconds after which to do the retried fetch. If
            `None`, our internal :attr:`retry_delay` is used, and adjusted by
            :const:`REQUEST_RETRY_FACTOR`.
        """

        # Have we been told to stop or shutdown?  Then don't actually retry.
        if self._stopping or self._shuttingdown or self._start_d is None:
            # Stopping, or stopped already? No more fetching.
            return
        if self._retry_call is None:
            if after is None:
                after = self.retry_delay
                self.retry_delay = min(self.retry_delay * REQUEST_RETRY_FACTOR,
                                       self.retry_max_delay)

            self._fetch_attempt_count += 1
            self._retry_call = self.client.reactor.callLater(
                after, self._do_fetch)

    def _handle_offset_response(self, response):
        """
        Handle responses to both OffsetRequest and OffsetFetchRequest, since
        they are similar enough.

        :param response:
            A tuple of a single OffsetFetchResponse or OffsetResponse
        """
        # Got a response, clear our outstanding request deferred
        self._request_d = None

        # Successful request, reset our retry delay, count, etc
        self.retry_delay = self.retry_init_delay
        self._fetch_attempt_count = 1

        response = response[0]
        if hasattr(response, 'offsets'):
            # It's a response to an OffsetRequest
            self._fetch_offset = response.offsets[0]
        else:
            # It's a response to an OffsetFetchRequest
            # Make sure we got a valid offset back. Kafka uses -1 to indicate
            # no committed offset was retrieved
            if response.offset == OFFSET_NOT_COMMITTED:
                self._fetch_offset = OFFSET_EARLIEST
            else:
                self._fetch_offset = response.offset + 1
                self._last_committed_offset = response.offset
        self._do_fetch()

    def _handle_offset_error(self, failure):
        """
        Retry the offset fetch request if appropriate.

        Once the :attr:`.retry_delay` reaches our :attr:`.retry_max_delay`, we
        log a warning.  This should perhaps be extended to abort sooner on
        certain errors.
        """
        # outstanding request got errback'd, clear it
        self._request_d = None

        if self._stopping and failure.check(CancelledError):
            # Not really an error
            return
        # Do we need to abort?
        if (self.request_retry_max_attempts != 0 and
                self._fetch_attempt_count >= self.request_retry_max_attempts):
            log.debug(
                "%r: Exhausted attempts: %d fetching offset from kafka: %r",
                self, self.request_retry_max_attempts, failure)
            self._start_d.errback(failure)
            return
        # Decide how to log this failure... If we have retried so many times
        # we're at the retry_max_delay, then we log at warning every other time
        # debug otherwise
        if (self.retry_delay < self.retry_max_delay or
                0 == (self._fetch_attempt_count % 2)):
            log.debug("%r: Failure fetching offset from kafka: %r", self,
                      failure)
        else:
            # We've retried until we hit the max delay, log at warn
            log.warning("%r: Still failing fetching offset from kafka: %r",
                        self, failure)
        self._retry_fetch()

    def _clear_processor_deferred(self, result):
        self._processor_d = None  # It has fired, we can clear it
        return result

    def _update_processed_offset(self, result, offset):
        self._last_processed_offset = offset
        self._auto_commit(by_count=True)

    def _clear_commit_req(self, result):
        self._commit_req = None  # It has fired, we can clear it
        return result

    def _update_committed_offset(self, result, offset):
        # successful commit request completed
        self._last_committed_offset = offset
        self._deliver_commit_result(offset)
        return offset

    def _deliver_commit_result(self, result):
        # Let anyone waiting know the commit completed. Handle the case where
        # they try to commit from the callback by preserving self._commit_ds
        # as a local, but clearing the attribute itself.
        commit_ds, self._commit_ds = self._commit_ds, []
        while commit_ds:
            d = commit_ds.pop()
            d.callback(result)

    def _send_commit_request(self, retry_delay=None, attempt=None):
        """Send a commit request with our last_processed_offset"""
        # If there's a _commit_call, and it's not active, clear it, it probably
        # just called us...
        if self._commit_call and not self._commit_call.active():
            self._commit_call = None

        # Make sure we only have one outstanding commit request at a time
        if self._commit_req is not None:
            raise OperationInProgress(self._commit_req)

        # Handle defaults
        if retry_delay is None:
            retry_delay = self.retry_init_delay
        if attempt is None:
            attempt = 1

        # Create new OffsetCommitRequest with the latest processed offset
        commit_offset = self._last_processed_offset
        commit_request = OffsetCommitRequest(
            self.topic, self.partition, commit_offset,
            TIMESTAMP_INVALID, self.commit_metadata)
        log.debug("Committing off=%d grp=%s tpc=%s part=%s req=%r",
                  self._last_processed_offset, self.consumer_group,
                  self.topic, self.partition, commit_request)

        # Send the request, add our callbacks
        self._commit_req = d = self.client.send_offset_commit_request(
            self.consumer_group, [commit_request])

        d.addBoth(self._clear_commit_req)
        d.addCallbacks(
            self._update_committed_offset, self._handle_commit_error,
            callbackArgs=(commit_offset,),
            errbackArgs=(retry_delay, attempt))

    def _handle_commit_error(self, failure, retry_delay, attempt):
        """ Retry the commit request, depending on failure type

        Depending on the type of the failure, we retry the commit request
        with the latest processed offset, or callback/errback self._commit_ds
        """
        # Check if we are stopping and the request was cancelled
        if self._stopping and failure.check(CancelledError):
            # Not really an error
            return self._deliver_commit_result(self._last_committed_offset)

        # Check that the failure type is a Kafka error...this could maybe be
        # a tighter check to determine whether a retry will succeed...
        if not failure.check(KafkaError):
            log.error("Unhandleable failure during commit attempt: %r\n\t%r",
                      failure, failure.getBriefTraceback())
            return self._deliver_commit_result(failure)

        # Do we need to abort?
        if (self.request_retry_max_attempts != 0 and
                attempt >= self.request_retry_max_attempts):
            log.debug("%r: Exhausted attempts: %d to commit offset: %r",
                      self, self.request_retry_max_attempts, failure)
            return self._deliver_commit_result(failure)

        # Check the retry_delay to see if we should log at the higher level
        # Using attempts % 2 gets us 1-warn/minute with defaults timings
        if retry_delay < self.retry_max_delay or 0 == (attempt % 2):
            log.debug("%r: Failure committing offset to kafka: %r", self,
                      failure)
        else:
            # We've retried until we hit the max delay, log alternately at warn
            log.warning("%r: Still failing committing offset to kafka: %r",
                        self, failure)

        # Schedule a delayed call to retry the commit
        retry_delay = min(retry_delay * REQUEST_RETRY_FACTOR,
                          self.retry_max_delay)
        self._commit_call = self.client.reactor.callLater(
            retry_delay, self._send_commit_request, retry_delay, attempt + 1)

    def _handle_auto_commit_error(self, failure):
        if self._start_d is not None and not self._start_d.called:
            self._start_d.errback(failure)

    def _handle_processor_error(self, failure):
        """Handle a failure in the processing of a block of messages

        This method is called when the processor func fails while processing
        a block of messages. Since we can't know how best to handle a
        processor failure, we just :func:`errback` our :func:`start` method's
        deferred to let our user know about the failure.
        """
        # Check if we're stopping/stopped and the errback of the processor
        # deferred is just the cancelling we initiated.  If so, we skip
        # notifying via the _start_d deferred, as it will be 'callback'd at the
        # end of stop()
        if not (self._stopping and failure.check(CancelledError)):
            if self._start_d:  # Make sure we're not already stopped
                self._start_d.errback(failure)

    def _handle_fetch_error(self, failure):
        """A fetch request resulted in an error. Retry after our current delay

        When a fetch error occurs, we check to see if the Consumer is being
        stopped, and if so just return, trapping the CancelledError. If not, we
        check if the Consumer has a non-zero setting for
        :attr:`request_retry_max_attempts` and if so and we have reached that limit we
        errback() the Consumer's start() deferred with the failure. If not, we
        determine whether to log at debug or warning (we log at warning every
        other retry after backing off to the max retry delay, resulting in a
        warning message approximately once per minute with the default timings)
        We then wait our current :attr:`retry_delay`, and retry the fetch. We
        also increase our retry_delay by Apery's constant (1.20205) and note
        the failed fetch by incrementing :attr:`_fetch_attempt_count`.

        NOTE: this may retry forever.
        TODO: Possibly make this differentiate based on the failure
        """
        # The _request_d deferred has fired, clear it.
        self._request_d = None

        if failure.check(OffsetOutOfRangeError):
            if self.auto_offset_reset is None:
                self._start_d.errback(failure)
                return
            self._fetch_offset = self.auto_offset_reset

        if self._stopping and failure.check(CancelledError):
            # Not really an error
            return
        # Do we need to abort?
        if (self.request_retry_max_attempts != 0 and
                self._fetch_attempt_count >= self.request_retry_max_attempts):
            log.debug(
                "%r: Exhausted attempts: %d fetching messages from kafka: %r",
                self, self.request_retry_max_attempts, failure)
            self._start_d.errback(failure)
            return

        # Decide how to log this failure... If we have retried so many times
        # we're at the retry_max_delay, then we log at warning every other time
        # debug otherwise
        if (self.retry_delay < self.retry_max_delay or
                0 == (self._fetch_attempt_count % 2)):
            log.debug("%r: Failure fetching messages from kafka: %r", self,
                      failure)
        else:
            # We've retried until we hit the max delay, log at warn
            log.warning("%r: Still failing fetching messages from kafka: %r",
                        self, failure)
        self._retry_fetch()

    def _handle_fetch_response(self, responses):
        """The callback handling the successful response from the fetch request

        Delivers the message list to the processor, handles per-message errors
        (ConsumerFetchSizeTooSmall), triggers another fetch request

        If the processor is still processing the last batch of messages, we
        defer this processing until it's done.  Otherwise, we start another
        fetch request and submit the messages to the processor
        """
        # Successful fetch, reset our retry delay
        self.retry_delay = self.retry_init_delay
        self._fetch_attempt_count = 1

        # Check to see if we are still processing the last block we fetched...
        if self._msg_block_d:
            # We are still working through the last block of messages...
            # We have to wait until it's done, then process this response
            self._msg_block_d.addCallback(
                lambda _: self._handle_fetch_response(responses))
            return

        # No ongoing processing, great, let's get some started.
        # Request no longer outstanding, clear the deferred tracker so we
        # can refetch
        self._request_d = None
        messages = []
        try:
            for resp in responses:  # We should really only ever get one...
                if resp.partition != self.partition:
                    log.warning(
                        "%r: Got response with partition: %r not our own: %r",
                        self, resp.partition, self.partition)
                    continue
                # resp.messages is a KafkaCodec._decode_message_set_iter
                # Note that 'message' here is really an OffsetAndMessage
                for message in resp.messages:
                    # Check for messages included which are from prior to our
                    # desired offset: can happen due to compressed message sets
                    if message.offset < self._fetch_offset:
                        log.debug(
                            'Skipping message at offset: %d, because its '
                            'offset is less that our fetch offset: %d.',
                            message.offset, self._fetch_offset)
                        continue
                    # Create a 'SourcedMessage' and add it to the messages list
                    messages.append(
                        SourcedMessage(
                            message=message.message,
                            offset=message.offset, topic=self.topic,
                            partition=self.partition))
                    # Update our notion of from where to fetch.
                    self._fetch_offset = message.offset + 1
        except ConsumerFetchSizeTooSmall:
            # A message was too large for us to receive, given our current
            # buffer size. Grow it until it works, or we hit our max
            # Grow by 16x up to 1MB (could result in 16MB buf), then by 2x
            factor = 2
            if self.buffer_size <= 2**20:
                factor = 16
            if self.max_buffer_size is None:
                # No limit, increase until we succeed or fail to alloc RAM
                self.buffer_size *= factor
            elif (self.max_buffer_size is not None and
                    self.buffer_size < self.max_buffer_size):
                # Limited, but currently below it.
                self.buffer_size = min(
                    self.buffer_size * factor, self.max_buffer_size)
            else:
                # We failed, and are already at our max. Nothing we can do but
                # create a Failure and errback() our start() deferred
                log.error("Max fetch size %d too small", self.max_buffer_size)
                failure = Failure(
                    ConsumerFetchSizeTooSmall(
                        "Max buffer size:%d too small for message",
                        self.max_buffer_size))
                self._start_d.errback(failure)
                return

            log.debug(
                "Next message larger than fetch size, increasing "
                "to %d (~2x) and retrying", self.buffer_size)

        finally:
            # If we were able to extract any messages, deliver them to the
            # processor now.
            if messages:
                self._msg_block_d = Deferred()
                self._process_messages(messages)

        # start another fetch, if needed, but use callLater to avoid recursion
        self._retry_fetch(0)

    def _process_messages(self, messages):
        """Send messages to the `processor` callback to be processed

        In the case we have a commit policy, we send messages to the processor
        in blocks no bigger than auto_commit_every_n (if set). Otherwise, we
        send the entire message block to be processed.

        """
        # Have we been told to shutdown?
        if self._shuttingdown:
            return
        # Do we have any messages to process?
        if not messages:
            # No, we're done with this block. If we had another fetch result
            # waiting, this callback will trigger the processing thereof.
            if self._msg_block_d:
                _msg_block_d, self._msg_block_d = self._msg_block_d, None
                _msg_block_d.callback(True)
            return
        # Yes, we've got some messages to process.
        # Default to processing the entire block...
        proc_block_size = sys.maxsize
        # Unless our auto commit_policy restricts us to process less
        if self.auto_commit_every_n:
            proc_block_size = self.auto_commit_every_n

        # Divide messages into two lists: one to process now, and remainder
        msgs_to_proc = messages[:proc_block_size]
        msgs_remainder = messages[proc_block_size:]
        # Call our processor callable and handle the possibility it returned
        # a deferred...
        last_offset = msgs_to_proc[-1].offset
        self._processor_d = d = maybeDeferred(self.processor, self, msgs_to_proc)
        log.debug('self.processor return: %r, last_offset: %r', d, last_offset)
        # Once the processor completes, clear our _processor_d
        d.addBoth(self._clear_processor_deferred)
        # Record the offset of the last processed message and check autocommit
        d.addCallback(self._update_processed_offset, last_offset)
        # If we were stopped, cancel the processor deferred. Note, we have to
        # do this here, in addition to in stop() because the processor func
        # itself could have called stop(), and then when it returned, we re-set
        # self._processor_d to the return of maybeDeferred().
        if self._stopping or self._start_d is None:
            d.cancel()
        else:
            # Setup to process the rest of our messages
            d.addCallback(lambda _: self._process_messages(msgs_remainder))
        # Add an error handler
        d.addErrback(self._handle_processor_error)

    def _do_fetch(self):
        """Send a fetch request if there isn't a request outstanding

        Sends a fetch request to the Kafka cluster to get messages at the
        current offset.  When the response comes back, if there are messages,
        it delivers them to the :attr:`processor` callback and initiates
        another fetch request.  If there is a recoverable error, the fetch is
        retried after :attr:`retry_delay`.

        In the case of an unrecoverable error, :func:`errback` is called on the
        :class:`Deferred` returned by :meth:`start()`.
        """
        # Check for outstanding request.
        if self._request_d:
            log.debug("_do_fetch: Outstanding request: %r", self._request_d)
            return

        # Cleanup our _retry_call, if we have one
        if self._retry_call is not None:
            if self._retry_call.active():
                self._retry_call.cancel()
            self._retry_call = None

        # Do we know our offset yet, or do we need to figure it out?
        if (self._fetch_offset == OFFSET_EARLIEST or
                self._fetch_offset == OFFSET_LATEST):
            # We need to fetch the offset for our topic/partition
            offset_request = OffsetRequest(
                self.topic, self.partition, self._fetch_offset, 1)
            self._request_d = self.client.send_offset_request([offset_request])
            self._request_d.addCallbacks(
                self._handle_offset_response, self._handle_offset_error)
        elif self._fetch_offset == OFFSET_COMMITTED:
            # We need to fetch the committed offset for our topic/partition
            # Note we use the same callbacks, as the responses are "close
            # enough" for our needs here
            if not self.consumer_group:
                # consumer_group must be set for OFFSET_COMMITTED
                failure = Failure(
                    InvalidConsumerGroupError("Bad Group_id:{0!r}".format(
                        self.consumer_group)))
                self._start_d.errback(failure)
            request = OffsetFetchRequest(self.topic, self.partition)
            self._request_d = self.client.send_offset_fetch_request(
                self.consumer_group, [request])
            self._request_d.addCallbacks(
                self._handle_offset_response, self._handle_offset_error)
        else:
            # Create fetch request payload for our partition
            request = FetchRequest(
                self.topic, self.partition, self._fetch_offset,
                self.buffer_size)
            # Send request and add handlers for the response
            self._request_d = self.client.send_fetch_request(
                [request], max_wait_time=self.fetch_max_wait_time,
                min_bytes=self.fetch_min_bytes)
            # We need a temp for this because if the response is already
            # available, _handle_fetch_response() will clear self._request_d
            d = self._request_d
            d.addCallback(self._handle_fetch_response)
            d.addErrback(self._handle_fetch_error)

    def _commit_timer_failed(self, fail):
        """Handle an error in the commit() function

        Our commit() function called by the LoopingCall failed. Some error
        probably came back from Kafka and _check_error() raised the exception
        For now, just log the failure and restart the loop
        """
        log.warning(
            '_commit_timer_failed: uncaught error %r: %s in _auto_commit',
            fail, fail.getBriefTraceback())
        self._commit_looper_d = self._commit_looper.start(
            self.auto_commit_every_s, now=False)

    def _commit_timer_stopped(self, lCall):
        """We're shutting down, clean up our looping call..."""
        if self._commit_looper is not lCall:
            log.warning('_commit_timer_stopped with wrong timer:%s not:%s',
                        lCall, self._commit_looper)
        else:
            log.debug('_commit_timer_stopped: %s %s', lCall,
                      self._commit_looper)
            self._commit_looper = None
            self._commit_looper_d = None
Beispiel #3
0
class NetworkService:
    """
    Prisma Network.
    """
    def __init__(self):
        self.logger = logging.getLogger('Network')
        self.status = STATUS_INIT
        self.callLater = Prisma().callLater
        self.reactor = reactor
        self.factory = NetworkFactory()
        self.listen_port = CONFIG.getint('network', 'listen_port')
        self.listener = None
        self.node_id = self.get_node_id()
        self.get_peers_lc = None
        self.get_events_lc = None
        self.get_peers_timer = CONFIG.getint('network', 'get_peers_timer')
        self.get_events_timer = CONFIG.getint('network', 'get_events_timer')
        self.timeout = CONFIG.getint('network', 'timeout')

    def start(self):
        """
        Start. Simply listen to the port and connect to a random peer.

        If remote_host and remote_port are specified, connect to that specific node.
        """
        # listen to the port
        self.listener = self.reactor.listenTCP(self.listen_port, self.factory)

        # bootstrap
        self.bootstrap()

        # start get_peers looping call
        self.get_peers_lc = LoopingCall(
            lambda: self.get_peers_from_random_peer())
        self.get_peers_lc.start(self.get_peers_timer)

        # start get_events looping call
        self.get_events_lc = LoopingCall(
            lambda: self.get_events_from_random_peer())
        self.get_events_lc.start(self.get_events_timer)

    def stop(self):
        """
        Close connections.
        """
        try:
            if self.get_peers_lc is not None:
                self.get_peers_lc.stop()
            if self.get_events_lc is not None:
                self.get_events_lc.stop()
            self.listener.stopListening()
        except Exception as e:
            self.logger.critical('Error while stopping Prisma network: ' +
                                 str(e))

    def bootstrap(self):
        """
        Delete all peers and bootstrap the peers in the config file by connecting and asking
        them the peers they know.
        """
        self.status = STATUS_BOOTSTRAPPING

        # delete first all peers
        Prisma().db.delete_peers()

        # bootstrap each of the nodes
        bootstrap_nodes = json.loads(CONFIG.get('bootstrap',
                                                'bootstrap_nodes'))
        for bootstrap in bootstrap_nodes:
            host, port = bootstrap.split(":")
            self.bootstrap_peer(host, int(port))

        # get state from a random node
        self.download_state_from_random_peer()

    def bootstrap_peer(self, host, port):
        """
        This will ask for send_get_peers() and if it's successful it will add it to the
        available peers.

        :param host: host to connect to
        :param port: port to connect to
        """
        self.logger.info('Bootstrapping peer {0}:{1}'.format(host, str(port)))
        client = TCP4ClientEndpoint(self.reactor, host, port, self.timeout)
        d = client.connect(self.factory)

        # in case of connection ok, add the callbacks for get_peers and call send_get_peers
        def connection_ok(protocol):
            protocol.d = defer.Deferred()

            def get_peers_ok(_):
                self.logger.info('Successfully got peers from {0}:{1}'.format(
                    host, port))

            protocol.d.addCallback(get_peers_ok)

            def get_peers_error(reason):
                error_message = reason.getErrorMessage()
                self.logger.error(
                    'Error when getting peers from {0}:{1}: {2}'.format(
                        host, port, error_message))
                protocol.close_connection()

            protocol.d.addErrback(get_peers_error)

            protocol.send_get_peers()

        d.addCallback(connection_ok)

        # in case of connection error just show in debug
        def connection_error(reason):
            self.logger.warning(
                'Error while connecting to {0}:{1}: {2}'.format(
                    host, port, reason.getErrorMessage()))

        d.addErrback(connection_error)

    def download_state_from_random_peer(self):
        """
        This will get the state of a random peer.
        """
        random_peer = Prisma().db.get_random_peer()

        # if list is empty
        if not random_peer:
            self.logger.info(
                'No peers to connect to. Wait to some peer to connect with you or restart with a peer.'
            )
            self.callLater(2, lambda: self.download_state_from_random_peer())
            return

        random_peer = random_peer.pop()
        host = random_peer['host']
        port = random_peer['port']
        client = TCP4ClientEndpoint(self.reactor, host, port, self.timeout)
        d = client.connect(self.factory)

        # in case of connection ok, add the callbacks for get_state
        def connection_ok(protocol):
            protocol.d = defer.Deferred()

            def get_state_ok(_):
                self.logger.info(
                    'Successfully got the state from {0}:{1}'.format(
                        host, port))
                self.status = STATUS_READY

            protocol.d.addCallback(get_state_ok)

            def get_state_error(reason):
                error_message = reason.getErrorMessage()
                self.logger.error(
                    'Error when getting state from {0}:{1}: {2}'.format(
                        host, port, error_message))
                protocol.close_connection()
                self.callLater(0,
                               lambda: self.download_state_from_random_peer())

            protocol.d.addErrback(get_state_error)

            protocol.send_get_state()

        d.addCallback(connection_ok)

        # in case of connection error show in debug and try again
        def connection_error(reason):
            # in case of error remove the peer from the database
            addr = random_peer['host'] + ':' + str(random_peer['port'])
            self.logger.debug('Error while connecting to {0}: {1}'.format(
                addr, reason.getErrorMessage()))
            Prisma().db.delete_peer(random_peer['_id'])
            # then call later again
            self.callLater(0, lambda: self.download_state_from_random_peer())

        d.addErrback(connection_error)

    def get_peers_from_random_peer(self):
        """
        Gets a random a peer from the database and connects to it and asks for peers.
        """
        # get a random peer from database
        random_peer = Prisma().db.get_random_peer()

        # if list is empty
        if not random_peer:
            self.logger.info(
                'No peers to connect to. Wait to some peer to connect with you or restart with a peer.'
            )
            return

        random_peer = random_peer.pop()
        host = random_peer['host']
        port = random_peer['port']
        client = TCP4ClientEndpoint(self.reactor, host, port, self.timeout)
        d = client.connect(self.factory)

        # in case of connection ok, add the callbacks for get_peers and call send_get_peers
        def connection_ok(protocol):
            protocol.d = defer.Deferred()

            def get_peers_ok(_):
                self.logger.info('Successfully got peers from {0}:{1}'.format(
                    host, port))

            protocol.d.addCallback(get_peers_ok)

            def get_peers_error(reason):
                error_message = reason.getErrorMessage()
                self.logger.error(
                    'Error when getting peers from {0}:{1}: {2}'.format(
                        host, port, error_message))
                protocol.close_connection()
                self.get_peers_lc.reset()
                self.get_peers_from_random_peer()

            protocol.d.addErrback(get_peers_error)

            protocol.send_get_peers()

        d.addCallback(connection_ok)

        # in case of connection error show in debug and try again
        def connection_error(reason):
            # in case of error remove the peer from the database
            addr = random_peer['host'] + ':' + str(random_peer['port'])
            self.logger.debug('Error while connecting to {0}: {1}'.format(
                addr, reason.getErrorMessage()))
            Prisma().db.delete_peer(random_peer['_id'])
            # then restart timer and try again get_peers_from_random_peer
            self.get_peers_lc.reset()
            self.get_peers_from_random_peer()

        d.addErrback(connection_error)

    def get_events_from_random_peer(self):
        """
        Gets a random a peer from the database and connects to it and asks for events.
        """
        # check first if ready
        if self.status != STATUS_READY:
            self.logger.info('Not ready, still bootstrapping.')
            return

        # check that we have enough peers
        peer_count = Prisma().db.count_peers()

        if peer_count < 3:
            if not CONFIG.getboolean('developer',
                                     'developer_mode') or peer_count < 1:
                self.logger.debug(
                    'Not enough peers found in the network, skipping...')
                return

        random_peer = Prisma().db.get_random_peer().pop()
        host = random_peer['host']
        port = random_peer['port']
        client = TCP4ClientEndpoint(self.reactor, host, port, self.timeout)
        d = client.connect(self.factory)

        # in case of connection ok, add the callbacks for get_peers and call send_get_peers
        def connection_ok(protocol):
            protocol.d = defer.Deferred()

            def get_events_ok(_):
                self.logger.info('Successfully got events from {0}:{1}'.format(
                    host, port))

            protocol.d.addCallback(get_events_ok)

            def get_events_error(reason):
                error_message = reason.getErrorMessage()
                self.logger.error(
                    'Error when getting events from {0}:{1}: {2}'.format(
                        host, port, error_message))
                protocol.close_connection()
                self.get_events_lc.reset()
                self.get_events_from_random_peer()

            protocol.d.addErrback(get_events_error)

            protocol.send_get_events()

        d.addCallback(connection_ok)

        # in case of connection error show in debug and try again
        def connection_error(reason):
            # in case of error remove the peer from the database
            addr = random_peer['host'] + ':' + str(random_peer['port'])
            self.logger.debug('Error while connecting to {0}: {1}'.format(
                addr, reason.getErrorMessage()))
            Prisma().db.delete_peer(random_peer['_id'])
            # then restart timer and try again get_peers_from_random_peer
            self.get_events_lc.reset()
            self.get_events_from_random_peer()

        d.addErrback(connection_error)

    @staticmethod
    def get_node_id():
        """
        Generate a node id based on public key.

        :return: string
        """
        crypto = Crypto()
        pub_key_sha = crypto.sha256(str(Prisma().graph.keystore['publicKey']))
        node_id = pub_key_sha[:10]
        return hexlify(node_id).decode('utf-8')
Beispiel #4
0
class Coordinator(object):
    """
    Private group coordinator implementation.

    You almost certainly want to use :class:`afkak.ConsumerGroup` instead.

    :param str group_id:
        Name of the consumer group to join for dynamic partition assignment,
        and to use for fetching and committing offsets.

    :param topics:
        Names of topics to consume. At least one topic must be given.
    :type topics: List[str]

    :param float session_timeout_ms:
        The timeout used to detect failures when using Kafka's group
        management facilities.
        Default: 30000

    :param float heartbeat_interval_ms:
        The expected time in milliseconds
        between heartbeats to the consumer coordinator when using
        Kafka's group management feature. Heartbeats are used to ensure
        that the consumer's session stays active and to facilitate
        rebalancing when new consumers join or leave the group. The
        value must be set lower than session_timeout_ms, but typically
        should be set no higher than 1/3 of that value. It can be
        adjusted even lower to control the expected time for normal
        rebalances.
        Default: 5000

    :param float initial_backoff_ms:
        Milliseconds to backoff between attempts to find the group
        coordinator broker at startup.
        Default: 1000

    :param float retry_backoff_ms:
        Milliseconds to backoff when retrying from
        expected errors and group membership changes.
        Default: 100.

    :param float fatal_backoff_ms:
        Milliseconds to backoff when retrying on
        unexpected kafka errors
        Default: 10000.
    """
    def __init__(self,
                 client,
                 group_id,
                 topics,
                 session_timeout_ms=30000,
                 heartbeat_interval_ms=5000,
                 initial_backoff_ms=1000,
                 retry_backoff_ms=100,
                 fatal_backoff_ms=10000):
        self.client = client
        self.group_id = group_id
        self.topics = topics
        if not topics:
            raise ValueError('topics must be non-empty')
        self.session_timeout_ms = session_timeout_ms
        self.heartbeat_interval_ms = heartbeat_interval_ms
        self.initial_backoff_ms = initial_backoff_ms
        self.retry_backoff_ms = retry_backoff_ms
        self.fatal_backoff_ms = fatal_backoff_ms

        self.member_id = ""
        self.leader_id = None
        self.generation_id = None
        self.coordinator_broker = None  # BrokerMetadata of the coordinator
        self.protocol = _ConsumerProtocol()

        self._start_d = None
        self._state = '[initialized]'  # Keep track of state for debugging
        # do we need to rejoin?
        self._rejoin_needed = True
        # are we shutting down?
        self._stopping = False
        # delayedcall for a pending rejoin
        self._rejoin_wait_dc = None
        # deferred for a rejoin in progress
        self._rejoin_d = False

        # loopingcall for the heartbeat timer
        self._heartbeat_looper = LoopingCall(self._heartbeat)
        self._heartbeat_looper.clock = self.client.reactor
        self._heartbeat_looper_d = None
        self._heartbeat_request_d = None

    def __repr__(self):
        return '<{}.{} group={} member={} topics={} {}>'.format(
            __name__,
            self.__class__.__name__,
            self.group_id,
            self.member_id,
            self.topics,
            self._state,
        )

    def get_coordinator_broker(self):
        def _get_coordinator_failed(result):
            retry_delay = self.initial_backoff_ms

            if result.check(CoordinatorNotAvailable,
                            NotCoordinatorForConsumerError,
                            RequestTimedOutError):
                # while kafka is starting up, it may take some time before it
                # successfully chooses an authoritative broker for the group
                log.debug("%s could not get coordinator broker: %s", self,
                          result.value)
                if result.check(RequestTimedOutError):
                    retry_delay = self.fatal_backoff_ms

            elif result.check(KafkaError):
                log.warn("%s could not get coordinator broker: %s", self,
                         result.value)
                retry_delay = self.fatal_backoff_ms
            else:
                return result

            self.client.reactor.callLater(
                retry_delay / 1000.0,
                self.join_and_sync,
            )
            return

        def _get_coordinator_success(leader):
            if not leader:
                self.client.reactor.callLater(
                    self.initial_backoff_ms / 1000.0,
                    self.join_and_sync,
                )
                return
            # do a topic metadata load, because if we are elected leader
            # we will need to know the partition info for all our topics
            # to assign them correctly
            metadata_d = self.client.load_metadata_for_topics(*self.topics)
            metadata_d.addCallback(lambda result: self)
            return metadata_d

        d = self.client._get_coordinator_for_group(self.group_id)
        d.addCallbacks(_get_coordinator_success, _get_coordinator_failed)
        return d

    def send_join_group_request(self):
        payload = _JoinGroupRequest(
            group=self.group_id,
            session_timeout=self.session_timeout_ms,
            member_id=self.member_id,
            protocol_type=self.protocol.protocol_type,
            group_protocols=self.protocol.join_group_protocols(self.topics),
        )

        def _join_group_success(response):
            self.member_id = response.member_id
            self.generation_id = response.generation_id
            self.leader_id = response.leader_id
            return response

        de = self.client._send_request_to_coordinator(
            self.group_id,
            payload,
            encoder_fn=KafkaCodec.encode_join_group_request,
            decode_fn=KafkaCodec.decode_join_group_response,
            # join_group requests can take up to 30s as the group restabilizes
            # override client.timeout to allow for that plus some extra
            min_timeout=35.0,
        )
        de.addCallbacks(
            _join_group_success,
            self.rejoin_after_error,
            errbackKeywords=dict(label="join_group"),
        )
        return de

    def send_sync_group_request(self, group_assignment):
        # Make the sync group request
        payload = _SyncGroupRequest(
            group=self.group_id,
            generation_id=self.generation_id,
            member_id=self.member_id,
            group_assignment=group_assignment,
        )
        de = self.client._send_request_to_coordinator(
            group=self.group_id,
            payload=payload,
            encoder_fn=KafkaCodec.encode_sync_group_request,
            decode_fn=KafkaCodec.decode_sync_group_response,
        )
        de.addErrback(self.rejoin_after_error, label="sync_group")
        return de

    def send_leave_group_request(self):
        # Make the leave group request
        payload = _LeaveGroupRequest(
            group=self.group_id,
            member_id=self.member_id,
        )

        def _leave_group_success(result):
            log.debug("leave_group request succeeded")
            self.member_id = ""
            self.generation_id = None

        de = self.client._send_request_to_coordinator(
            group=self.group_id,
            payload=payload,
            encoder_fn=KafkaCodec.encode_leave_group_request,
            decode_fn=KafkaCodec.decode_leave_group_response,
        )
        de.addCallback(_leave_group_success)
        return de

    def send_heartbeat_request(self):
        # Make the heartbeat request
        payload = _HeartbeatRequest(
            group=self.group_id,
            generation_id=self.generation_id,
            member_id=self.member_id,
        )
        request_d = self.client._send_request_to_coordinator(
            group=self.group_id,
            payload=payload,
            encoder_fn=KafkaCodec.encode_heartbeat_request,
            decode_fn=KafkaCodec.decode_heartbeat_response,
        )
        return request_d

    def start(self):
        if self._start_d:
            raise RestartError("Start called on already-started coordinator")

        log.debug("starting")
        self._start_d = Deferred()
        self.join_and_sync()
        return self._start_d

    def reset_heartbeat_timer(self):
        if self._heartbeat_looper.running:
            self._heartbeat_looper.reset()
        else:
            self._heartbeat_looper_d = self._heartbeat_looper.start(
                self.heartbeat_interval_ms / 1000.0,
                now=False,
            )
            self._heartbeat_looper_d.addErrback(self._heartbeat_timer_failed)
            self._heartbeat_looper_d.addBoth(self._heartbeat_timer_stopped)

    @inlineCallbacks
    def stop(self, errback_result=None):
        if self._start_d is None:
            raise RestopError("Shutdown called on non-running coordinator")

        if self._stopping:
            raise RestopError("Shutdown called more than once.")

        log.info("%s stopping with %s", self, errback_result)
        self._state = '[stopping]'
        self._stopping = True
        self._rejoin_needed = False
        if self._rejoin_wait_dc:
            self._rejoin_wait_dc.cancel()

        if self._heartbeat_request_d:
            self._heartbeat_request_d.cancel()

        if self._heartbeat_looper:
            if self._heartbeat_looper.running:
                self._heartbeat_looper.stop()

        self._state = '[leaving]'
        if self.coordinator_broker is not None and self.member_id:
            try:
                yield self.send_leave_group_request()
            except Exception:
                log.exception("error sending leave group request")

        if self._rejoin_d:
            self._rejoin_d, d = None, self._rejoin_d
            d.cancel()

        self._state = '[stopped]'
        self.protocol = None
        self.member_id = ""
        self.generation_id = None
        self.coordinator_broker = None

        self._start_d, d = None, self._start_d
        if not d.called:
            if errback_result:
                d.errback(errback_result)
            else:
                d.callback(self)

    def _heartbeat(self):
        if self._stopping:
            return
        if self._rejoin_needed:
            log.debug("%s: skipping heartbeat, rejoin needed", self)
            return
        if self._heartbeat_request_d:
            log.debug("%s: skipping heartbeat, in progress", self)
            return
        log.debug("%s: heartbeat", self)
        self._heartbeat_request_d = self.send_heartbeat_request()
        self._heartbeat_request_d.addCallbacks(
            self._handle_heartbeat_success,
            self._handle_heartbeat_failure,
        )

    def _handle_heartbeat_success(self, result):
        self._heartbeat_request_d = None
        log.debug("%s: heartbeat success", self)
        return result

    def _handle_heartbeat_failure(self, failure):
        self._heartbeat_request_d = None
        self._heartbeat_looper.stop()
        return self.rejoin_after_error(failure, label="heartbeat")

    def _heartbeat_timer_failed(self, failure):
        """
        Called when the heartbeat LoopingCall terminates with an unhandled
        exception. This represents a bug.
        """
        log.error("%s Unhandled exception in heartbeat loop",
                  self,
                  exc_info=(failure.type, failure.value,
                            failure.getTracebackObject()))
        # TODO: This should handle the failure by leaving the group.
        return failure

    def _heartbeat_timer_stopped(self, result):
        self._heartbeat_looper_d = None
        return result

    def rejoin_after_error(self, result, label="rejoin_after_error"):
        rejoin_delay = self.retry_backoff_ms

        if result.check(RebalanceInProgress):
            log.debug("%s %s: group rebalance needed, rejoining", self, label)

        elif result.check(CoordinatorNotAvailable,
                          NotCoordinatorForConsumerError):
            log.info("%s %s: group coordinator is invalid, rejoining", self,
                     label)
            self.client.reset_consumer_group_metadata(self.group_id)

        elif result.check(IllegalGeneration):
            # we have been rejected - our consumers should not remain open
            log.info("%s %s: generation id %s is not current, rejoining", self,
                     label, self.generation_id)
            self.on_group_leave()

        elif result.check(InvalidGroupId, UnknownMemberId):
            # we have been rejected - our consumers should not remain open
            log.info("%s %s: member id is not valid, rejoining", self, label)
            self.on_group_leave()
            self.member_id = ""

        elif result.check(InconsistentGroupProtocol):
            # this error can only happen if there's already a consumer group on
            # the topic and it doesn't support the protocols we've declared.
            # retrying is unlikely to help, so wait longer between attempts
            log.error("%s %s: group protocol was rejected, delaying rejoin",
                      self, label)
            rejoin_delay = self.fatal_backoff_ms

        elif result.check(RequestTimedOutError):
            # If the request timed out, use the long delay before trying again
            log.warn("%s %s: request timed out, delaying rejoin", self, label)
            self.on_group_leave()
            self.client.reset_consumer_group_metadata(self.group_id)
            rejoin_delay = self.fatal_backoff_ms

        elif self._stopping and result.check(CancelledError):
            # Not really an error
            return

        elif result.check(KafkaError):
            # no other kafka errors are expected from the requests we're
            # sending out, so to get here means something is wrong.
            # retrying probably won't help, so wait longer between attempts
            log.error("%s %s: unexpected response error: %r, delaying rejoin",
                      self, label, result)
            rejoin_delay = self.fatal_backoff_ms

        else:
            # if it wasn't a kafka error, we're not going to try to rejoin
            log.error("%s %s: unknown error %s", self, label, result)
            self.on_group_leave()
            self.stop(errback_result=result)
            return

        self._state = '[rejoin_needed]'
        self._rejoin_needed = True
        if not self._rejoin_wait_dc:
            rejoin_delay_s = rejoin_delay / 1000.0
            log.debug("%s %s: scheduling rejoin in %.1fs", self, label,
                      rejoin_delay_s)
            self._rejoin_wait_dc = self.client.reactor.callLater(
                rejoin_delay_s, self.join_and_sync)

    def join_and_sync(self):
        """
            Called to join (or rejoin) the consumer group.
            Failures will result in join_and_sync being rescheduled
            to run again after a short delay, until it succeeds.

            Failed heartbeats will also reschedule join_and_sync
        """
        if self._rejoin_wait_dc:
            self._rejoin_wait_dc = None

        if not self._rejoin_needed:
            log.debug("join_and_sync: rejoin not needed")
            return

        # prevent multiple concurrent request situations
        if self._rejoin_d:
            # XXX: This should throw, not silently ignore.
            log.debug("join_and_sync: rejoin in progress")
            return

        def cleanup_rejoin_d(result):
            self._rejoin_d = None
            return result

        def rejoin_d_errback(result):
            log.error("%s error during join_and_sync: %s", self, result)

        self._rejoin_d = d = self._join_and_sync()
        d.addBoth(cleanup_rejoin_d).addErrback(rejoin_d_errback)
        return d

    @inlineCallbacks
    def _join_and_sync(self):
        self._state = '[fetching_broker]'
        coordinator_broker = yield self.get_coordinator_broker()
        if not coordinator_broker or self._stopping:
            return
        self.coordinator_broker = coordinator_broker

        self._state = '[joining]'
        yield self.on_join_prepare()
        join_response = yield self.send_join_group_request()
        if not join_response or self._stopping:
            # join failed, we'll be called again after a small delay
            return

        log.debug("%s joined, leader=%s", self,
                  join_response.leader_id == join_response.member_id)
        assignments = []
        if join_response.leader_id == join_response.member_id:
            try:
                assignments = yield self.protocol.generate_assignments(
                    join_response.members,
                    topic_partitions={},
                )
            except _NeedTopicPartitions as e:
                topic_partitions = yield self.client._load_topic_partitions(
                    *e.topics)
                assignments = yield self.protocol.generate_assignments(
                    join_response.members,
                    topic_partitions=topic_partitions,
                )

        self._state = '[syncing]'
        sync_response = yield self.send_sync_group_request(assignments)
        if not sync_response or self._stopping:
            # sync failed, we'll be called again after a small delay
            return

        # sync success - update our assignments
        # and restart the heartbeat timer
        assignment = self.protocol.decode_assignment(
            sync_response.member_assignment)
        self.reset_heartbeat_timer()
        self._rejoin_needed = False
        self._state = '[joined]'

        yield self.on_join_complete(assignment)

    def on_group_leave(self):
        """
            Called when the coordinator has been removed from the group
            (such as from a heartbeat error) and will not be rejoining
        """
        log.info("%s: on_group_leave", self)

    def on_join_prepare(self):
        """
            Called before joining or rejoining the group
        """
        log.info("%s: on_join_prepare", self)

    def on_join_complete(self, assignments):
        """
            Called after a successful group sync
        """
        log.info("%s: on_join_complete assignments=%r", self, assignments)
Beispiel #5
0
class TelldusIn(Protocol):
    ''' Class for incoming Telldus events '''

    noisy = False
    path = '/tmp/TelldusEvents'
    idleTimeout = 60


    def __init__(self, parent):
        self.log = Logger(namespace=parent.name+'/in')
        self.parent = parent
        self.master = parent.master
        self.status = ColorState(log=self.log, state_format={0:0}) # <-- a hack to avoid color
        self.status.add_callback(self.parent.update_status)
        self.connected = False
        self.factory = TelldusInFactory(self, self.parent)


    def connect(self):
        self.status.set_OFF()
        self.master.reactor.connectUNIX(self.path, self.factory)


    def connectionMade(self):
        self.log.info("Connected to {p}", p=self.path)
        self.connected = True
        self.status.set_YELLOW('IN connection made, waiting for data')
        self.data = ''
        self.elements = []
        self.timer = LoopingCall(self.dataTimeout)
        self.timer.start(self.idleTimeout, now=False)


    def connectionLost(self, reason):  # pylint: disable=W0222
        self.connected = False
        self.log.info("Lost connection with {p}: {e}", p=self.path, e=reason.getErrorMessage())
        self.status.set_OFF('IN connection closed')
        if self.timer.running:
            self.timer.stop()


    def disconnect(self):
        if self.connected:
            self.transport.loseConnection()


    def dataReceived(self, data):
        self.log.debug('{_rawin}', rawin=data)

        if self.timer.running:
            self.timer.reset()
        else:
            self.timer.start(self.idleTimeout, now=False)

        data = self.data + data

        # Interpret the data
        (elements, data) = parsestream(data)
        (events, elements) = parseelements(elements, log=self.log)

        # Save remaining data for next call (incomplete frame received)
        self.data = data
        self.elements = elements

        # At this point, we can consider the connection up
        self.status.set_GREEN()

        # Iterate over the received events
        for event in events:
            self.log.debug('{_datain}', datain=event)
            self.parent.parse_event(event)


    def dataTimeout(self):
        self.timer.stop()
        self.log.info("No telldus activity. No connection?")
        self.status.set_YELLOW('No activity')
Beispiel #6
0
class IRCProtocol(irc.IRC):
    UNREGISTERED_COMMANDS = ["PASS", "USER", "NICK", "PING", "PONG", "QUIT", "CAP"]

    def __init__(self, *args, **kwargs):
        self.dead = False
        self.type = None
        self.password = None
        self.nick = None
        self.user = None
        self.secure = False
        self.data = 0
        self.data_checker = LoopingCall(self.checkData)
        self.pinger = LoopingCall(self.ping)
        self.last_message = None

    def connectionMade(self):
        self.secure = ISSLTransport(self.transport, None) is not None
        self.data_checker.start(5)
        self.last_message = now()
        self.pinger.start(self.factory.client_ping_interval)
        ip = self.transport.getPeer().host
        expired = []
        for mask, linedata in self.factory.xlines["Z"].iteritems():
            if linedata["duration"] != 0 and epoch(now()) > epoch(linedata["created"]) + linedata["duration"]:
                expired.append(mask)
                continue
            if fnmatch.fnmatch(ip, mask):
                self.sendMessage(
                    "NOTICE", "*", ":{}".format(self.factory.client_ban_msg), prefix=self.factory.server_name
                )
                self.sendMessage("ERROR", ":Closing Link {} [Z:Lined: {}]".format(ip, linedata["reason"]))
                self.transport.loseConnection()
        for mask in expired:
            del self.factory.xlines["Z"][mask]
        if expired:
            self.factory.save_options()

    def dataReceived(self, data):
        if self.dead:
            return
        # self.factory.stats_data["bytes_in"] += len(data)
        # self.factory.stats_data["total_bytes_in"] += len(data)
        self.data += len(data)
        self.last_message = now()
        if self.pinger.running:
            self.pinger.reset()
        irc.IRC.dataReceived(self, data)

    def checkData(self):
        if self.type:
            self.type.checkData(self.data)
        self.data = 0

    def ping(self):
        if (now() - self.last_message).total_seconds() > self.factory.client_timeout_delay:
            self.transport.loseConnection()
            self.connectionLost(None)
        else:
            self.sendMessage("PING", ":{}".format(self.factory.server_name))

    def handleCommand(self, command, prefix, params):
        # self.factory.stats_data["lines_in"] += 1
        # self.factory.stats_data["total_lines_in"] += 1
        log.msg("handleCommand: {!r} {!r} {!r}".format(command, prefix, params))
        if not self.type and command not in self.UNREGISTERED_COMMANDS:
            return self.sendMessage(
                irc.ERR_NOTREGISTERED, command, ":You have not registered", prefix=self.factory.server_name
            )
        elif not self.type:
            return irc.IRC.handleCommand(self, command, prefix, params)
        else:
            return self.type.handleCommand(command, prefix, params)

    def sendLine(self, line):
        if self.dead:
            return
        # self.factory.stats_data["lines_out"] += 1
        # self.factory.stats_data["total_lines_out"] += 1
        # self.factory.stats_data["bytes_out"] += len(line)+2
        # self.factory.stats_data["total_bytes_out"] += len(line)+2
        log.msg("sendLine: {!r}".format(line))
        return irc.IRC.sendLine(self, line)

    def irc_PASS(self, prefix, params):
        if not params:
            return self.sendMessage(
                irc.ERR_NEEDMOREPARAMS, "PASS", ":Not enough parameters", prefix=self.factory.server_name
            )
        self.password = params[0]

    def irc_NICK(self, prefix, params):
        if not params:
            self.sendMessage(irc.ERR_NONICKNAMEGIVEN, ":No nickname given", prefix=self.factory.server_name)
        elif not VALID_USERNAME.match(params[0]):
            self.sendMessage(
                irc.ERR_ERRONEUSNICKNAME, params[0], ":Erroneous nickname", prefix=self.factory.server_name
            )
        elif params[0] in self.factory.users:
            self.sendMessage(
                irc.ERR_NICKNAMEINUSE,
                self.factory.users[params[0]].nickname,
                ":Nickname is already in use",
                prefix=self.factory.server_name,
            )
        else:
            lower_nick = irc_lower(params[0])
            expired = []
            for mask, linedata in self.factory.xlines["Q"].iteritems():
                if linedata["duration"] != 0 and epoch(now()) > epoch(linedata["created"]) + linedata["duration"]:
                    expired.append(mask)
                    continue
                if fnmatch.fnmatch(lower_nick, mask):
                    self.sendMessage(
                        irc.ERR_ERRONEUSNICKNAME,
                        self.nick if self.nick else "*",
                        params[0],
                        ":Invalid nickname: {}".format(linedata["reason"]),
                        prefix=self.factory.server_name,
                    )
                    return
            for mask in expired:
                del self.factory.xlines["Q"][mask]
            if expired:
                self.factory.save_options()
            self.nick = params[0]
            if self.user:
                try:
                    self.type = self.factory.types["user"](self, self.user, self.password, self.nick)
                except ValueError:
                    self.type = None
                    self.transport.loseConnection()

    def irc_USER(self, prefix, params):
        if len(params) < 4:
            return self.sendMessage(
                irc.ERR_NEEDMOREPARAMS, "USER", ":Not enough parameters", prefix=self.factory.server_name
            )
        self.user = params
        if self.nick:
            try:
                self.type = self.factory.types["user"](self, self.user, self.password, self.nick)
            except ValueError:
                self.type = None
                self.transport.loseConnection()

    def irc_SERVICE(self, prefix, params):
        try:
            self.type = self.factory.types["service"](self, params, self.password)
        except ValueError:
            self.type = None
            self.transport.loseConnection()

    def irc_SERVER(self, prefix, params):
        try:
            self.type = self.factory.types["server"](self, params, self.password)
        except ValueError:
            self.type = None
            self.transport.loseConnection()

    def irc_PING(self, prefix, params):
        if params:
            self.sendMessage("PONG", self.factory.server_name, ":{}".format(params[0]), prefix=self.factory.server_name)
        else:  # TODO: There's no nickname here, what do?
            self.sendMessage(irc.ERR_NOORIGIN, "CHANGE_THIS", ":No origin specified", prefix=self.factory.server_name)

    def irc_PONG(self, prefix, params):
        pass

    def irc_CAP(self, prefix, params):
        pass

    def irc_QUIT(self, prefix, params):
        self.transport.loseConnection()

    def connectionLost(self, reason):
        if self.dead:
            return
        self.dead = True
        self.factory.unregisterProtocol(self)
        if self.type:
            self.type.connectionLost(reason)
        if self.data_checker.running:
            self.data_checker.stop()
        if self.pinger.running:
            self.pinger.stop()
Beispiel #7
0
class Startup(YomboLibrary):
    """
    Start-up checks

    Checks to make sure basic configurations are valid and other start-up operations.
    """
    def __str__(self):
        """
        Returns the name of the library.
        :return: Name of the library
        :rtype: string
        """
        return "Yombo startup library"

    def _init_(self, **kwargs):
        first_run = self._Configs.get('core', 'first_run', False, False)
        if self._Loader.operating_mode == 'first_run':  # will know if first_run already or yombo.ini is missing.
            return
        if first_run is True:
            self._Loader.operating_mode = 'first_run'
            return True
        items_needed = []

        gwid = self._Configs.get('core', 'gwid', 'local', False)
        if gwid is None or gwid == "":
            items_needed.append("Gateway ID")
        gwuuid = self._Configs.get("core", "gwuuid", None)
        if gwuuid is None or gwuuid == "":
            items_needed.append("Gateway private UUID")
        hash = self._Configs.get("core", "gwhash", None)
        if hash is None or hash == "":
            items_needed.append("Gateway login password hash")
        is_master = self._Configs.get("core", "is_master", True)
        if is_master is False:
            master_gateway = self._Configs.get("core", "master_gateway", None,
                                               False)
            if master_gateway is None or master_gateway == "":
                items_needed.append(
                    "Gateway is marked as slave, but not master gateway set.")
        else:
            master_gateway = self._Configs.get("core", "master_gateway", None,
                                               False)

        if len(items_needed) > 0:
            needed_text = '</li><li>'.join(items_needed)
            print("start needetext: %s" % needed_text)
            self._Notifications.add({
                'title':
                'Need configurations',
                'message':
                'System has been places into configuration mode. Reason: The following configurations are needed:<p><ul><li>%s</li></ul>'
                % needed_text,
                'source':
                'Yombo Startup Library',
                'persist':
                False,
                'priority':
                'high',
                'always_show':
                True,
                'always_show_allow_clear':
                True,
            })
            self._Loader.operating_mode = 'config'
        else:
            self._Loader.operating_mode = 'run'

        self.cache_updater_running = False
        self.system_stopping = False

    def _start_(self, **kwargs):
        """
        Handles making sure caches are regularly updated and cleaned.
        :return:
        """
        self.update_caches_loop = LoopingCall(self.update_caches)
        self.update_caches_loop.start(60 * 60 * 6, False)

    def _stop_(self, **kwargs):
        self.system_stopping = True

    @inlineCallbacks
    def update_caches(self, force=None, quick=False):
        """
        Iterates through all libraries to update the cache, then tells the modules to do the same.
        :return:
        """
        if self.cache_updater_running is True and force is True:
            self.cache_updater_running == None
            return

        if self._Loader.operating_mode != 'run' or self.cache_updater_running is not False or \
                self.system_stopping is True:
            return

        logger.info("Starting cache updates.")

        # Copied from Modules library, but with sleeps and checks.
        for module_id, module in self._Modules.modules.items():
            if self.cache_updater_running is None or self.system_stopping is True:
                self.cache_updater_running = False
                self.update_caches_loop.reset()
                return

            yield self._Modules.do_update_module_cache(module)
            yield sleep(1)

        for device_id, device in self._Devices.devices.items():
            if self.cache_updater_running is None or self.system_stopping is True:
                self.cache_updater_running = False
                self.update_caches_loop.reset()
                return

            yield device.device_variables()
            yield sleep(0.5)
class C2SServerProtocol(InternalServerProtocol):
    pinger = None
    ping_txid = None

    def __init__(self, config):
        txprotobuf.Protocol.__init__(self, c2s)
        self.MAX_LENGTH = config['server']['c2s.pack_size_max']
        self.idler = LoopingCall(self.onIdle)
        if 'reset' not in dir(self.idler):

            def reset(self):
                try:
                    self.idler.stop()
                except:
                    pass
                try:
                    self.idler.start(IDLE_DELAY, False)
                except:
                    pass

            self.idler.reset = reset

    def connectionMade(self):
        InternalServerProtocol.connectionMade(self)
        """
        TODO this breaks compatibility with Android client < 16
        info = InternalServerProtocol.connectionMade(self)
        # send serverinfo
        r = c2s.ServerInfo()
        r.version = info['version']
        r.client_protocol = info['client-protocol']
        r.fingerprint = info['fingerprint']
        r.network = info['network']
        r.supports.extend(info['supports'])
        self.sendBox(r)
        """
        # start idler
        self.idler.start(IDLE_DELAY, False)

    def connectionLost(self, reason=connectionDone):
        try:
            # stop idler
            self.idler.stop()
        except:
            pass
        try:
            # stop pinger
            self.pinger.stop()
        except:
            pass
        InternalServerProtocol.connectionLost(self, reason)

    def onIdle(self):
        self.service.idle()
        # send ping
        a = c2s.Ping()
        a.timestamp = long(time.time())
        self.ping_txid = self.sendBox(a)
        # ping is sent, wait for response
        if not self.pinger:
            self.pinger = reactor.callLater(PING_DELAY, self.pingTimeout)

    def pingTimeout(self):
        try:
            self.idler.stop()
        except:
            pass
        self.service.ping_timeout()
        self.pinger = None

    def boxReceived(self, data, tx_id=None):
        # reset idler
        try:
            self.idler.reset()
        except:
            pass

        # optional reply
        r = None
        name = data.__class__.__name__

        if name == 'LoginRequest':
            r = c2s.LoginResponse()
            (r.status, userid) = self.service.login(str(tx_id), data.token,
                                                    data.client_protocol,
                                                    data.client_version,
                                                    data.flags)
            if userid:
                r.user_id = userid

        elif name == 'AuthenticateRequest':
            r = c2s.AuthenticateResponse()
            r.valid = self.service.authenticate(str(tx_id), data.token)

        elif name == 'MessagePostRequest':
            if self.service.is_logged():
                r = c2s.MessagePostResponse()
                if len(data.recipient) > 0:
                    res = self.service.post_message(str(tx_id),
                                                    tuple(data.recipient),
                                                    str(data.mime),
                                                    tuple(data.flags),
                                                    data.content)
                    for userid, msgid in res.iteritems():
                        me = r.entry.add()
                        me.user_id = userid
                        if type(msgid) == str:
                            me.status = c2s.MessagePostResponse.MessageSent.STATUS_SUCCESS
                            me.message_id = msgid
                        elif msgid:
                            me.status = msgid
                        else:
                            me.status = c2s.MessagePostResponse.MessageSent.STATUS_ERROR

        elif name == 'MessageAckRequest':
            if self.service.is_logged():
                r = c2s.MessageAckResponse()
                res = self.service.ack_message(str(tx_id),
                                               tuple(data.message_id))
                for _msgid in data.message_id:
                    msgid = str(_msgid)
                    e = r.entry.add()
                    e.message_id = msgid
                    try:
                        success = res[msgid]
                        if success:
                            e.status = c2s.MessageAckResponse.Entry.STATUS_SUCCESS
                        else:
                            e.status = c2s.MessageAckResponse.Entry.STATUS_NOTFOUND
                    except:
                        import traceback
                        traceback.print_exc()
                        e.status = c2s.MessageAckResponse.Entry.STATUS_ERROR

        elif name == 'RegistrationRequest':
            if not self.service.is_logged():
                r = c2s.RegistrationResponse()
                res = self.service.register_user(str(tx_id),
                                                 str(data.username))
                r.status = res['status']
                if 'token' in res:
                    r.token = res['token']
                if 'sms_from' in res:
                    r.sms_from = res['sms_from']
                if 'email_from' in res:
                    r.email_from = res['email_from']

        elif name == 'ValidationRequest':
            if not self.service.is_logged():
                r = c2s.ValidationResponse()
                res, token = self.service.validate_user(
                    str(tx_id), str(data.validation_code))
                r.status = res
                if token:
                    r.token = token

        elif name == 'UserLookupRequest':
            if self.service.is_logged():
                # lookup can delay operations by a long time, so defer it
                def lookup_complete(found, tx_id, return_value=False):
                    r = c2s.UserLookupResponse()
                    for u in found:
                        e = r.entry.add()
                        e.user_id = u['userid']
                        if 'status' in u:
                            e.status = u['status']
                        if 'timestamp' in u:
                            e.timestamp = u['timestamp']
                        if 'timediff' in u:
                            e.timediff = u['timediff']
                    if return_value:
                        return r
                    else:
                        self.sendBox(r, tx_id)

                # force generic users only
                found = self.service.lookup_users(
                    tx_id,
                    [str(x)[:utils.USERID_LENGTH] for x in data.user_id])
                if isinstance(found, defer.Deferred):
                    found.addCallback(lookup_complete, tx_id)
                else:
                    r = lookup_complete(found, tx_id, True)

        elif name == 'ServerInfoRequest':
            r = c2s.ServerInfoResponse()
            info = self.service.serverinfo(str(tx_id),
                                           str(data.client_version),
                                           data.client_protocol)
            r.version = info['version']
            r.client_protocol = info['client-protocol']
            r.server_protocol = info['server-protocol']
            r.fingerprint = info['fingerprint']
            r.supports.extend(info['supports'])

        elif name == 'UserInfoUpdateRequest':
            if self.service.is_logged():
                r = c2s.UserInfoUpdateResponse()
                flags = None
                if data.HasField('flags'):
                    flags = data.flags
                if data.HasField('status_message'):
                    status_msg = data.status_message.encode('utf-8')
                else:
                    status_msg = None
                if data.HasField('google_registration_id'):
                    google_regid = str(data.google_registration_id)
                else:
                    google_regid = None

                r.status = self.service.user_update(flags, status_msg,
                                                    google_regid)

        elif name == 'UserPresenceSubscribeRequest':
            if self.service.is_logged():
                r = c2s.UserPresenceSubscribeResponse()
                r.status = self.service.user_presence_subscribe(
                    str(data.user_id), data.events)

        elif name == 'Pong':
            # client has responded: check tx_id
            if self.ping_txid == tx_id:
                # reset ping timeout check
                try:
                    self.pinger.cancel()
                except:
                    pass
                self.pinger = None

        elif name == 'ValidationCodeRequest':
            if self.service.is_logged():
                code = self.service.revalidate()
                r = c2s.ValidationCodeResponse()
                if type(code) == str:
                    r.status = c2s.ValidationCodeResponse.STATUS_SUCCESS
                    r.code = code
                else:
                    r.status = c2s.ValidationCodeResponse.STATUS_ERROR

        if r:
            self.sendBox(r, tx_id)

        # reset idler (again)
        try:
            self.idler.reset()
        except:
            pass
Beispiel #9
0
class AmqpConfigHandler(YomboLibrary):
    """
    Handles interactions with Yombo servers through the AMQP library.
    """
    def __init__(self, amqpyombo):
        """
        Loads various variables and calls :py:meth:connect() when it's ready.

        :return:
        """
        self.parent = amqpyombo
        self.init_startup_count = 0
        self.inital_config_items_requested = 0
        self.init_defer = None  # Prevents loader from moving on until we are done.
        self.__doing_full_configs = False  # will be set to True later when download configurations
        self.__pending_updates = {
        }  # Holds a dict of configuration items we've asked for, but not response yet.
        self.__process_queue = {
        }  # Holds a list of configuration items we've asked for, but not response yet.
        self.processing = False
        self.processing_queue = False
        self.db_existing_data = {
        }  # used to keep track of items to add, update, or delete
        self.db_completed_ids = {}  # [table][row_id]
        self.db_delete_ids = {}  # [table][row_id]
        self.db_insert_data = {}  # [table][row_id] = orderedDicts
        self.db_update_data = {}  # [table][row_id] = Dictionaries

        self._checkProcessQueueLoop = LoopingCall(self.process_config_queue)
        self.check_download_done_calllater = None

        self._LocalDB = self.parent._LocalDB

        self.amqp = None  # holds our pointer for out amqp connection.
        self._getAllConfigsLoggerLoop = None

    def connect_setup(self, init_defer):
        """
        The connection was setup, but not quite ready to ask for configs. Just setup a defer.

        :return:
        """
        self.init_defer = init_defer

    def connected(self):
        """
        Called when fully connected.

        :return:
        """
        self.get_system_configs()
        self.check_download_done_calllater = reactor.callLater(
            15, self.check_download_done)
        # return self.init_defer

    def disconnected(self):
        """
        Called by amqpyombo when the system is disconnected.

        :return:
        """
        if self._getAllConfigsLoggerLoop is not None and self._getAllConfigsLoggerLoop.running:
            self._getAllConfigsLoggerLoop.stop()
        if self._checkProcessQueueLoop is not None and self._checkProcessQueueLoop.running:
            self._checkProcessQueueLoop.stop()

        self.__pending_updates.clear()
        self.__doing_full_configs = False

        try:
            self.check_download_done_calllater.cancel()
        except:
            pass

    def _stop_(self):
        """
        Called by the Yombo system when it's time to shutdown. This in turn calls the disconnect.

        :return:
        """
        if self.init_defer.called is False:
            # reactor.callLater(0.1, self.init_defer.callback, 10) #
            self.init_defer.callback(
                1)  # if we don't check for this, we can't stop!

        if self._getAllConfigsLoggerLoop is not None and self._getAllConfigsLoggerLoop.running:
            self._getAllConfigsLoggerLoop.stop()
        if self._checkProcessQueueLoop is not None and self._checkProcessQueueLoop.running:
            self._checkProcessQueueLoop.stop()
        try:
            self.check_download_done_calllater.cancel()
        except:
            pass

    def check_download_done(self):
        """
        Called after 30 seconds to check if downloads have completed. If they haven't, it will just give up and move on.

        :return:
        """
        # print(f"check_download_done_calllater....{self.__process_queue}")
        if self.__doing_full_configs is True and len(
                self.__process_queue) == 0:
            last_complete = self.parent._Configs.get("amqpyombo",
                                                     "lastcomplete", None,
                                                     False)
            if last_complete == None:
                if self.init_startup_count > 5:
                    logger.error(
                        "Unable to reach or contact server. If problem persists, check your configs. "
                        "(Help link soon.)")
                    self.reconnect = False
                    reactor.stop()
                    return
                logger.warn(
                    "Try #{count}, haven't been able to download configurations."
                    " However, there are no existing configs. Will keep trying.",
                    count=self.init_startup_count)
            else:
                if last_complete < int(time() - 60 * 60 * 48):
                    logger.warn(
                        "Try #{count}, haven't been able to download configurations. "
                        "Will continue trying in background.",
                        count=self.init_startup_count)
                    logger.warn(
                        "Using old configuration information. If this persists, check your configs. "
                        "(Help link soon.)")
                    if self.init_defer.called is False:
                        self.init_defer.callback(
                            1)  # if we don't check for this, we can't stop!
                else:
                    logger.error(
                        "Unable to reach or contact server. Configurations too old to keep using. "
                        "If problem persists, check your configs. (Help link soon.)"
                    )
                    self.reconnect = False
                    reactor.stop()
                    return

            self.init_startup_count = self.init_startup_count + 1
            self.check_download_done_calllater = reactor.callLater(
                30, self.check_download_done)  #

    def amqp_incoming_response(self, headers, body, properties, **kwargs):
        """
        Process configuration information coming from Yombo Servers. After message is validated, the payload is
        delivered here.

        This is an intermediary method and converts the msg into usable chunks and delievered to "add_update_delete".
        In the future, these may be delievered to the individual libraries for processing.

        :param msg: raw message from AQMP
        :param properties: msg properties, so additional information can be retrieved.
        :return:
        """
        # logger.info("properties: {properties}", properties=properties)
        # logger.info("headers: {headers}", headers=properties.headers)

        response_type = headers["response_type"]
        if response_type not in CONFIG_ITEMS:
            raise YomboWarning(
                f"Configuration item 'get_{response_type}' not configured.")
        self.__process_queue[random_string(length=10)] = {
            "msg": body,
            "headers": headers,
            "properties": properties,
        }

        full_response_type = "get_" + response_type
        if full_response_type not in self.__pending_updates:  # Ignore old request items.
            logger.warn(
                "Configuration item not requested, dropping: {response_type}",
                response_type=response_type)
            return
        self.__pending_updates[full_response_type]["status"] = "received"
        self.process_config_queue()

    @inlineCallbacks
    def process_config_queue(self):
        """
        We queue incoming responses for processing. This helps to prevent race conditions as well as
        killing sqlite3 database with a bunch of concurrent requests.

        This is called to feed the process_config function configuration items to process and is called whenever
        we receive a configuration response item as well as a looping call to ensure everything gets processed.
        :return:
        """
        if self.processing:
            return None

        if self.processing_queue == False:
            logger.debug("Starting process_config_queue.")
            self.processing_queue = True
            for key in list(self.__process_queue):
                queue = self.__process_queue[key]
                # print("processing queue item: %s" % queue)
                # if queue["headers"]["config_item"] == "get_gateway_cluster":
                # print("msg: %s" % queue["msg"])
                # print("self.__pending_updates: %s" % self.__pending_updates)
                self.__pending_updates[
                    f'get_{queue["headers"]["response_type"]}'][
                        "status"] = "processing"
                yield self.process_config(queue["msg"],
                                          queue["headers"]["response_type"],
                                          queue["headers"]["config_type"],
                                          True)
                del self.__process_queue[key]
            self.processing_queue = False

    @inlineCallbacks
    def process_config(self,
                       msg,
                       config_item,
                       config_type=None,
                       primary_config_item=None):
        """
        Process configuration information coming from Yombo Servers. After message is validated, the payload is
        delivered here.

        This is an intermediary method and converts the msg into usable chunks and delievered to "add_update_delete".
        In the future, these may be delievered to the individual libraries for processing.

        :param msg: raw message from AQMP
        :param properties: msg properties, so additional information can be retrieved.
        :return:
        """
        self.processing = True
        # print("processing config.... %s" % config_item)
        # print("processing msg.... %s" % msg)
        if msg["code"] != 200:
            logger.warn(
                "Configuration error for '{type}' received an error ({code}): {error}",
                type=config_item,
                code=msg["code"],
                error=msg["message"])
            yield self._remove_full_download_dueue("get_" + config_item)
            self.processing = False
            return

        if config_item == "gateway_configs":
            payload = msg["data"]
            for section in payload:
                for key in section["Values"]:
                    self.parent._Configs.set(section["Section"], key["Key"],
                                             key["Value"])

        elif config_item == "gateway_dns_name":
            payload = msg["data"]
            self.parent._Configs.set("dns", "name", payload["dns_name"])
            self.parent._Configs.set("dns", "domain", payload["dns_domain"])
            self.parent._Configs.set("dns", "domain_id",
                                     payload["dns_domain_id"])
            self.parent._Configs.set("dns", "allow_change_at",
                                     payload["allow_change_at"])
            self.parent._Configs.set("dns", "fqdn", payload["fqdn"])

        elif config_item in CONFIG_ITEMS:
            config_data = CONFIG_ITEMS[config_item]
            if msg["data_type"] == "object":
                # print("field remap start object")
                data = self.field_remap(msg["data"], config_data)
                # print("field remap stop")
                #                logger.info("in amqpyombo:process_config ->> config_item: {config_item}", config_item=config_item)
                #                logger.info("amqpyombo::process_config - data: {data}", data=data)
                #                 print("add_update_delete start object")
                yield self.add_update_delete(msg, data, config_item,
                                             config_type, True)
                # print("fadd_update_delete stop object")
                # self._Devices.add_update_delete(new_data)
                self.process_config(data, config_item)
            else:
                # print("add_update_delete start list")
                processed_data = []
                for data in msg["data"]:
                    processed_data.append(self.field_remap(data, config_data))
#                    logger.info("in amqpyombo:process_config ->> config_item: {config_item}", config_item=config_item)
#                    logger.info("amqpyombo::process_config - data: {data}", data=data)
                yield self.add_update_delete(msg, processed_data, config_item,
                                             config_type, True)
                # print("add_update_delete stop list")
        else:
            logger.warn(
                "ConfigurationUpdate::process_config - '{config_item}' is not a valid configuration item. Skipping.",
                config_item=config_item)
        yield self._remove_full_download_dueue("get_" + config_item)
        self.processing = False

    def field_remap(self, data, config_data):
        new_data = {}
        table_meta = self._LocalDB.db_model[config_data["table"]]
        key = None
        value = None
        try:
            for key, value in data.items(
            ):  # we must re-map AMQP names to local names.  Removes ones without a DB column too.
                if key in config_data["map"]:
                    # Convert ints and floats.
                    if value is None:
                        pass
                    elif table_meta[config_data["map"]
                                    [key]]["type"] == "INTEGER":
                        value = int(value)
                    elif table_meta[config_data["map"][key]]["type"] == "REAL":
                        value = float(value)
                    elif table_meta[config_data["map"]
                                    [key]]["type"] == "BOOLEAN":
                        value = is_true_false(value)

                    if key == "energy_map":
                        if isinstance(value, dict):
                            try:
                                value = json.dumps(value)
                            except Exception as e:
                                value = '{"0.0":0,"1.0":0}'
                    new_data[config_data["map"][key]] = value

                else:
                    new_data[key] = value
            return new_data
        except Exception as e:
            print(f"error in field remap.  Last key: {key}")
            print(f"input value: {value}")
            print(f"field remap - config_data = {config_data}")
            print(f"field remap - data = {data}")
            print(f"tablemeta: {table_meta}")
            logger.error("--------==(Error: {e})==--------", e=e)
            logger.error(
                "--------------------------------------------------------")
            logger.error("{error}", error=sys.exc_info())
            logger.error(
                "---------------==(Traceback)==--------------------------")
            logger.error("{trace}", trace=traceback.print_exc(file=sys.stdout))
            logger.error(
                "--------------------------------------------------------")

    # still in progress function. Need to clean up DB calls.
    # todo: what about updates directly to the library? Just call: config_item = get_config_item("devices")
    @inlineCallbacks
    def add_update_delete(self,
                          msg,
                          data_items,
                          config_item,
                          config_type,
                          from_amqp_incoming=False):
        """
        Adds, updates, or delete various items based on the content of the data item being sent it. It will
        inspect the data and make a determination on what to do. It will also consider the values stored in the
        database for add vs update.

        :param data:
        :param config_item:
        :param from_amqp_incoming:
        :return:
        """
        # print("add_update_delete called...%s" % config_item)
        config_data = CONFIG_ITEMS[config_item]
        current_table = config_data["table"]
        required_db_keys = []

        table_cols = self._LocalDB.db_model[current_table]

        if current_table not in self.db_existing_data:
            # print("creating db tracking trackers for: %s" % current_table)
            self.db_existing_data[current_table] = {}
            self.db_insert_data[current_table] = []
            self.db_update_data[current_table] = []
            self.db_completed_ids[current_table] = []
            self.db_delete_ids[current_table] = []

            select_fields = ["id"]
            if "updated_at" in table_cols:
                select_fields.append("updated_at")
            if "created_at" in table_cols:
                select_fields.append("created_at")
            records = yield self._LocalDB.select(current_table,
                                                 ", ".join(select_fields))
            for record in records:
                self.db_existing_data[current_table][record["id"]] = record
            if config_item == "gateway_modules":
                records = yield self._LocalDB.select("module_installed", "id")
                self.db_existing_data["module_installed"] = {}
                for record in records:
                    self.db_existing_data["module_installed"][
                        record["id"]] = True

                records = yield self._LocalDB.select("module_commits", "id")
                self.db_existing_data["module_commits"] = {}
                for record in records:
                    self.db_existing_data["module_commits"][
                        record["id"]] = True
                self.db_insert_data["module_commits"] = []
                self.db_update_data["module_commits"] = []
                self.db_completed_ids["module_commits"] = []

                # print("gateway module data_items: %s" % data_items)

        # self.db_existing_data = {}  # used to keep track of items to add, update, or delete
        # self.db_delete_ids = {}  # [table][row_id]
        # self.db_insert_data = {}  # [table][row_id] = orderedDicts
        # self.db_update_data = {}  # [table][row_id] = Dictionaries

        for col, col_data in table_cols.items():
            if col_data["notnull"] == 1:
                required_db_keys.append(col)

        if isinstance(data_items, list) is False:
            data_items = (data_items, )
        to_save = deque()

        try:
            for data in data_items:
                filtered_data = {
                }  # dict of just keys that are allowed in the DB.
                for key, value in data.items():
                    if key in table_cols:
                        filtered_data[key] = data[key]

                if "status" in filtered_data:
                    if filtered_data[
                            "status"] == 2:  # delete any nested items...
                        if config_item == "gateway_modules":
                            # print("config_item: %s" % config_item)
                            # print("current_table: %s" % current_table)
                            # print("self.db_existing_data: %s" % self.db_existing_data)
                            # print("filtered data: %s" % filtered_data)
                            self.item_purged(config_item, filtered_data["id"])

                            if filtered_data["id"] in self.db_existing_data[
                                    "modules"]:
                                self.db_delete_ids["modules"].append(
                                    filtered_data["id"])
                                self.db_completed_ids["modules"].append(
                                    filtered_data["id"])
                            if filtered_data["id"] in self.db_existing_data[
                                    "module_installed"]:
                                self.db_delete_ids["module_installed"].append(
                                    filtered_data["id"])
                                self.db_completed_ids[
                                    "module_installed"].append(
                                        filtered_data["id"])
                            self._LocalDB.delete(
                                "module_commits",
                                where=["module_id = ?", filtered_data["id"]])
                            # print(f"gateway commits: {data['module_commits']}")

                        elif filtered_data["id"] in self.db_existing_data[
                                current_table]:
                            self.db_delete_ids[current_table].append(
                                filtered_data["id"])
                            self.db_completed_ids[current_table].append(
                                filtered_data["id"])
                        continue

                if config_item == "gateway_modules":
                    for commit in data['module_commits']:
                        if commit["id"] in self.db_existing_data[
                                "module_commits"]:
                            self.db_update_data["module_commits"].append(
                                commit)
                        else:
                            self.db_insert_data["module_commits"].append(
                                commit)
                        self.db_completed_ids["module_commits"].append(
                            filtered_data["id"])

                # check to make sure the record has the required columns.
                if dict_has_key(filtered_data, required_db_keys) is False:
                    missing = set(required_db_keys) - filtered_data.keys()
                    logger.warn(
                        "{config_item}: Cannot do anything. Must have these keys: {needs}  Missing: {missing}",
                        config_item=config_item,
                        needs=required_db_keys,
                        missing=missing)
                    self.db_completed_ids[current_table].append(
                        filtered_data["id"])
                    continue

                library = None
                if config_data["library"] is not None:
                    library = self.parent._Loader.loaded_libraries[
                        config_data["library"]]

                if config_type == "full":
                    records = []
                else:
                    records = yield self._LocalDB.get_dbitem_by_id(
                        config_data["dbclass"], filtered_data["id"])

                # handle any nested items here.
                if "device_types" in data:
                    if len(data["device_types"]):
                        newMsg = msg.copy()
                        newMsg["data"] = data["device_types"]
                        yield self.process_config(newMsg, "module_device_type")

                if "variable_groups" in data:
                    if len(data["variable_groups"]):
                        newMsg = msg.copy()
                        newMsg["data"] = data["variable_groups"]
                        yield self.process_config(newMsg, "variable_groups")

                if "variable_fields" in data:
                    if len(data["variable_fields"]):
                        newMsg = msg.copy()
                        newMsg["data"] = data["variable_fields"]
                        yield self.process_config(newMsg, "variable_fields")

                if "variable_data" in data:
                    if len(data["variable_data"]):
                        newMsg = msg.copy()
                        newMsg["data"] = data["variable_data"]
                        yield self.process_config(newMsg, "variable_data")

                # If a new record
                # print("if filtered_data["id"] (%s) not in self.db_existing_data[current_table] (%s) == 0:
                if filtered_data["id"] not in self.db_existing_data[
                        current_table]:
                    if "status" in filtered_data and filtered_data[
                            "status"] == 2:  # we don"t add deleted items...
                        if config_data["purgeable"]:
                            self.item_purged(config_item, filtered_data["id"])
                            continue

                    item_to_insert = OrderedDict()
                    for col, col_info in table_cols.items():
                        if col in filtered_data:
                            item_to_insert[col] = filtered_data[col]
                        else:
                            item_to_insert[col] = None

                    self.db_insert_data[current_table].append(item_to_insert)
                    self.db_completed_ids[current_table].append(
                        filtered_data["id"])
                    if "added" in config_data["functions"]:
                        klass = getattr(library,
                                        config_data["functions"]["updated_at"])
                        klass(
                            data, True
                        )  # True = the library doesn"t need to update the database

                # update records
                else:
                    # if config_item == "gateway_modules":
                    #     print("new data: %s" % filtered_data)
                    #     print("old data: %s" % self.db_existing_data[current_table][filtered_data["id"]])
                    #     print("filtered_data["updated_at"]: %s" % filtered_data["updated_at"])
                    if "updated_at" in filtered_data:
                        # Only update if records says it's newer...
                        if filtered_data[
                                "updated_at"] == self.db_existing_data[
                                    current_table][
                                        filtered_data["id"]]["updated_at"]:
                            self.db_completed_ids[current_table].append(
                                data["id"])
                            continue
                    elif "created_at" in filtered_data:
                        if filtered_data[
                                "created_at"] == self.db_existing_data[
                                    current_table][
                                        filtered_data["id"]]["created_at"]:
                            self.db_completed_ids[current_table].append(
                                data["id"])
                            continue

                    item_to_update = OrderedDict()
                    for col, col_info in table_cols.items():
                        if col in filtered_data:
                            item_to_update[col] = filtered_data[col]
                        else:
                            item_to_update[col] = None

                    # print("updating table: %s" % current_table)
                    self.db_update_data[current_table].append(item_to_update)
                    self.db_completed_ids[current_table].append(
                        filtered_data["id"])
                    # self._LocalDB.dbconfig.update(current_table, filtered_data, where=["id = ?", data["id"]] )
                    if "updated_at" in config_data["functions"]:
                        klass = getattr(library,
                                        config_data["functions"]["added"])
                        klass(filtered_data, True)

            # print("calling insertmany for config_data %s" % to_save)
            # if len(to_save) > 0:
            #     yield self._LocalDB.insert_many(current_table, to_save)
        except Exception as e:
            logger.error(
                "-------==(Error: While saving new config data)==--------")
            logger.error(
                "--------------------------------------------------------")
            logger.error("{error}", error=sys.exc_info())
            logger.error(
                "---------------==(Traceback)==--------------------------")
            logger.error("{trace}", trace=traceback.print_exc(file=sys.stdout))
            logger.error(
                "--------------------------------------------------------")

            logger.warn("Got exception: {e}", e=e)

    def get_config_item(self, library):
        """
        Simple lookup function to get config_item
        :param library:
        :return:
        """
        return CONFIG_ITEM_MAP[library]

    def get_system_configs(self):
        """
        On startup, this is called to request all configuration items.

        TODO: After heavy development cycle is done, we check will be placed here to determine if the gateway
        can reach the servers or not. It it's not possible, and we some configuration data that isn't too old,
        we will can start without talking to the servers.

        :return:
        """
        self.__doing_full_configs = True
        self._full_download_start_at = time()
        self._getAllConfigsLoggerLoop = LoopingCall(self._show_pending_configs)
        self._getAllConfigsLoggerLoop.start(
            3, False
        )  # Display a log line for pending downloaded, false - Dont' show now
        self._checkProcessQueueLoop.start(0.5, False)

        logger.info(
            "Requesting system configurations from server. This can take a few seconds."
        )

        allCommands = [
            "get_categories",
            "get_gateway_cluster",
            "get_gateway_commands",
            "get_gateway_device_command_inputs",
            "get_gateway_device_types",
            "get_gateway_device_type_commands",
            "get_gateway_devices",  # Includes device variable groups/fields/data
            "get_gateway_dns_name",
            "get_gateway_input_types",
            "get_gateway_locations",
            "get_gateway_modules",  # Includes device variable groups/fields/data
            "get_gateway_nodes",
            "get_gateway_users",

            # "get_gateway_configs",
            # "getGatewayUserTokens",
        ]
        self.inital_config_items_requested = len(allCommands)
        cur_time = int(time())
        for item in allCommands:
            logger.debug("sending command: {item}", item=item)

            # request device updates
            body = {
                "since":
                self.parent._Configs.get("amqpyombo_config_times", item, 0),
            }

            self.parent._Configs.set("amqpyombo_config_times", item, cur_time),

            # headers= {
            #     "config_item": item,
            # }
            request = self.generate_config_request(
                # headers=headers,
                body=body,
                request_type=item,
            )

            self.parent.publish(**request)
            self._append_full_download_queue(item)

    def item_purged(self, config_item, id):
        body = {"id": id}

        headers = {
            "request_type": "config",
            "config_item": f"purged_{config_item}",
        }

        request_msg = self.parent.generate_message_request(
            exchange_name="ysrv.e.gw_config",
            source="yombo.gateway.lib.amqpyobo",
            destination="yombo.server.configs",
            headers=headers,
            body=body,
        )
        request_msg["correlation_persistent"] = False
        self.parent.publish(**request_msg)

    def generate_config_request(self,
                                headers=None,
                                body=None,
                                request_type=None):
        """
        Generate a request specific to this library - configs!

        :param headers:
        :param request_data:
        :return:
        """
        request_msg = self.parent.generate_message_request(
            exchange_name="ysrv.e.gw_config",
            source="yombo.gateway.lib.amqpyobo",
            destination="yombo.server.configs",
            request_type=request_type,
            headers=headers,
            body=body,
        )
        # logger.debug("response: {request_msg}", request_msg=request_msg)
        request_msg["correlation_persistent"] = False
        return request_msg

    def _append_full_download_queue(self, table):
        """
        Adds an item to pending table queue.

        Will be removed as each config item is returned by _removeFullTableQueue.
        """
        if table not in self.__pending_updates:
            logger.debug("Adding table to request queue: {table}", table=table)
            self.__pending_updates[table] = {
                "status": "requested",
                "request_at": time(),
            }

    @inlineCallbacks
    def _remove_full_download_dueue(self, table):
        # logger.info("Removing table from request queue: {table}", table=table)
        # logger.info("Configs pending: {pendingUpdates}", pendingUpdates=self.__pending_updates)
        if table in self.__pending_updates:
            del self.__pending_updates[table]
        else:
            return
        # logger.debug("Configs pending: {pendingUpdates}", pendingUpdates=self.__pending_updates)
        self.init_startup_count = 0
        if len(self.__pending_updates
               ) == 0 and self.__doing_full_configs is True:
            self.__doing_full_configs = False
            self._getAllConfigsLoggerLoop.stop()
            self._checkProcessQueueLoop.stop()

            # now we do all actual database updates, deletes, inserts...
            # self.db_existing_data = {}  # used to keep track of items to add, update, or delete
            # self.db_delete_ids = {}  # [table][row_id]
            # self.db_insert_data = {}  # [table][row_id] = orderedDicts
            # self.db_update_data = {}  # [table][row_id] = Dictionaries

            for table, records in self.db_delete_ids.items():
                for completed_id in self.db_completed_ids[
                        table]:  # delete items that were not sent to us.
                    if completed_id in self.db_existing_data[table]:
                        del self.db_existing_data[table][completed_id]
                for existing_id in list(self.db_existing_data[table].keys()):
                    # print("Left over items: should be deleted.... %s: %s" % (table, existing_id))
                    records.append(existing_id)
                if len(records) > 0:
                    # print("%s should be delete_many these records: %s" % (table, records))
                    yield self._LocalDB.delete_many(table, records)
            self.db_existing_data = {}

            for table, records in self.db_update_data.items():
                if len(records) > 0:
                    # print("%s should be update_many records: %s" % (table, records))
                    yield self._LocalDB.update_many(table, records, "id")
            self.db_update_data = {}

            for table, records in self.db_insert_data.items():
                if len(records) > 0:
                    # print("%s should be insert_many these records: %s" % (table, records))
                    yield self._LocalDB.insert_many(table, records)
            self.db_update_data = {}

            reactor.callLater(0.1, self.done_with_startup_downloads
                              )  # give DB some breathing room
        else:
            if self._getAllConfigsLoggerLoop is not None and self._getAllConfigsLoggerLoop.running:
                self._getAllConfigsLoggerLoop.reset()

            if self.check_download_done_calllater is not None and self.check_download_done_calllater.active(
            ):
                self.check_download_done_calllater.reset(30)
            else:
                self.check_download_done_calllater = reactor.callLater(
                    30, self.check_download_done)  #

    def done_with_startup_downloads(self):
        """
        Called when configuration downloads are complete.
        :return:
        """
        self.parent._Configs.set("amqpyombo", "lastcomplete", int(time()))
        if self.init_defer.called is False:
            self.init_defer.callback(
                1)  # if we don't check for this, we can't stop!
            # self.parent._Notifications.add(
            #     {'title': 'Configs Downloaded', 'message': 'Downloaded system configurations from Yombo server.',
            #      'timeout': 300, 'source': 'Yombo Gateway System'})

    def _show_pending_configs(self):
        waitingTime = time() - self._full_download_start_at
        logger.debug(
            "Waited {waitingTime} for startup; pending these configurations:",
            waitingTime=waitingTime)
        count_pending = 0
        currently_processing = ""
        display_waiting_on = []
        for key in list(self.__pending_updates):
            count_pending += 1
            logger.debug("Config: {config}, Status: {status}",
                         config=key,
                         status=self.__pending_updates[key]["status"])
            if self.__pending_updates[key]["status"] == "processing":
                currently_processing = key
                self.check_download_done_calllater.reset(30)
            display_waiting_on.append(key)

        if currently_processing == "":
            logger.info(
                "Waiting for the following items from server: {the_list}",
                the_list=", ".join(display_waiting_on))
        else:
            logger.info(
                "Processing configuration, {count_pending} of {numer_requested}. Currently processing: {currently_processing}",
                currently_processing=currently_processing,
                count_pending=self.inital_config_items_requested -
                count_pending,
                numer_requested=self.inital_config_items_requested)

    def _local_log(self, level, location, msg=""):
        logit = func = getattr(logger, level)
        logit("In {location} : {msg}", location=location, msg=msg)
Beispiel #10
0
class C2SServerProtocol(InternalServerProtocol):
    pinger = None
    ping_txid = None

    def __init__(self, config):
        txprotobuf.Protocol.__init__(self, c2s)
        self.MAX_LENGTH = config["server"]["c2s.pack_size_max"]
        self.idler = LoopingCall(self.onIdle)
        if "reset" not in dir(self.idler):

            def reset(self):
                try:
                    self.idler.stop()
                except:
                    pass
                try:
                    self.idler.start(IDLE_DELAY, False)
                except:
                    pass

            self.idler.reset = reset

    def connectionMade(self):
        InternalServerProtocol.connectionMade(self)
        """
        TODO this breaks compatibility with Android client < 16
        info = InternalServerProtocol.connectionMade(self)
        # send serverinfo
        r = c2s.ServerInfo()
        r.version = info['version']
        r.client_protocol = info['client-protocol']
        r.fingerprint = info['fingerprint']
        r.network = info['network']
        r.supports.extend(info['supports'])
        self.sendBox(r)
        """
        # start idler
        self.idler.start(IDLE_DELAY, False)

    def connectionLost(self, reason=connectionDone):
        try:
            # stop idler
            self.idler.stop()
        except:
            pass
        try:
            # stop pinger
            self.pinger.stop()
        except:
            pass
        InternalServerProtocol.connectionLost(self, reason)

    def onIdle(self):
        self.service.idle()
        # send ping
        a = c2s.Ping()
        a.timestamp = long(time.time())
        self.ping_txid = self.sendBox(a)
        # ping is sent, wait for response
        if not self.pinger:
            self.pinger = reactor.callLater(PING_DELAY, self.pingTimeout)

    def pingTimeout(self):
        try:
            self.idler.stop()
        except:
            pass
        self.service.ping_timeout()
        self.pinger = None

    def boxReceived(self, data, tx_id=None):
        # reset idler
        try:
            self.idler.reset()
        except:
            pass

        # optional reply
        r = None
        name = data.__class__.__name__

        if name == "LoginRequest":
            r = c2s.LoginResponse()
            (r.status, userid) = self.service.login(
                str(tx_id), data.token, data.client_protocol, data.client_version, data.flags
            )
            if userid:
                r.user_id = userid

        elif name == "AuthenticateRequest":
            r = c2s.AuthenticateResponse()
            r.valid = self.service.authenticate(str(tx_id), data.token)

        elif name == "MessagePostRequest":
            if self.service.is_logged():
                r = c2s.MessagePostResponse()
                if len(data.recipient) > 0:
                    res = self.service.post_message(
                        str(tx_id), tuple(data.recipient), str(data.mime), tuple(data.flags), data.content
                    )
                    for userid, msgid in res.iteritems():
                        me = r.entry.add()
                        me.user_id = userid
                        if type(msgid) == str:
                            me.status = c2s.MessagePostResponse.MessageSent.STATUS_SUCCESS
                            me.message_id = msgid
                        elif msgid:
                            me.status = msgid
                        else:
                            me.status = c2s.MessagePostResponse.MessageSent.STATUS_ERROR

        elif name == "MessageAckRequest":
            if self.service.is_logged():
                r = c2s.MessageAckResponse()
                res = self.service.ack_message(str(tx_id), tuple(data.message_id))
                for _msgid in data.message_id:
                    msgid = str(_msgid)
                    e = r.entry.add()
                    e.message_id = msgid
                    try:
                        success = res[msgid]
                        if success:
                            e.status = c2s.MessageAckResponse.Entry.STATUS_SUCCESS
                        else:
                            e.status = c2s.MessageAckResponse.Entry.STATUS_NOTFOUND
                    except:
                        import traceback

                        traceback.print_exc()
                        e.status = c2s.MessageAckResponse.Entry.STATUS_ERROR

        elif name == "RegistrationRequest":
            if not self.service.is_logged():
                r = c2s.RegistrationResponse()
                res = self.service.register_user(str(tx_id), str(data.username))
                r.status = res["status"]
                if "token" in res:
                    r.token = res["token"]
                if "sms_from" in res:
                    r.sms_from = res["sms_from"]
                if "email_from" in res:
                    r.email_from = res["email_from"]

        elif name == "ValidationRequest":
            if not self.service.is_logged():
                r = c2s.ValidationResponse()
                res, token = self.service.validate_user(str(tx_id), str(data.validation_code))
                r.status = res
                if token:
                    r.token = token

        elif name == "UserLookupRequest":
            if self.service.is_logged():
                # lookup can delay operations by a long time, so defer it
                def lookup_complete(found, tx_id, return_value=False):
                    r = c2s.UserLookupResponse()
                    for u in found:
                        e = r.entry.add()
                        e.user_id = u["userid"]
                        if "status" in u:
                            e.status = u["status"]
                        if "timestamp" in u:
                            e.timestamp = u["timestamp"]
                        if "timediff" in u:
                            e.timediff = u["timediff"]
                    if return_value:
                        return r
                    else:
                        self.sendBox(r, tx_id)

                # force generic users only
                found = self.service.lookup_users(tx_id, [str(x)[: utils.USERID_LENGTH] for x in data.user_id])
                if isinstance(found, defer.Deferred):
                    found.addCallback(lookup_complete, tx_id)
                else:
                    r = lookup_complete(found, tx_id, True)

        elif name == "ServerInfoRequest":
            r = c2s.ServerInfoResponse()
            info = self.service.serverinfo(str(tx_id), str(data.client_version), data.client_protocol)
            r.version = info["version"]
            r.client_protocol = info["client-protocol"]
            r.server_protocol = info["server-protocol"]
            r.fingerprint = info["fingerprint"]
            r.supports.extend(info["supports"])

        elif name == "UserInfoUpdateRequest":
            if self.service.is_logged():
                r = c2s.UserInfoUpdateResponse()
                flags = None
                if data.HasField("flags"):
                    flags = data.flags
                if data.HasField("status_message"):
                    status_msg = data.status_message.encode("utf-8")
                else:
                    status_msg = None
                if data.HasField("google_registration_id"):
                    google_regid = str(data.google_registration_id)
                else:
                    google_regid = None

                r.status = self.service.user_update(flags, status_msg, google_regid)

        elif name == "UserPresenceSubscribeRequest":
            if self.service.is_logged():
                r = c2s.UserPresenceSubscribeResponse()
                r.status = self.service.user_presence_subscribe(str(data.user_id), data.events)

        elif name == "Pong":
            # client has responded: check tx_id
            if self.ping_txid == tx_id:
                # reset ping timeout check
                try:
                    self.pinger.cancel()
                except:
                    pass
                self.pinger = None

        elif name == "ValidationCodeRequest":
            if self.service.is_logged():
                code = self.service.revalidate()
                r = c2s.ValidationCodeResponse()
                if type(code) == str:
                    r.status = c2s.ValidationCodeResponse.STATUS_SUCCESS
                    r.code = code
                else:
                    r.status = c2s.ValidationCodeResponse.STATUS_ERROR

        if r:
            self.sendBox(r, tx_id)

        # reset idler (again)
        try:
            self.idler.reset()
        except:
            pass