Exemple #1
0
    def _do_sasl_handshake(self):
        req_klass = self._version_info.pick_best(SaslHandShakeRequest)

        sasl_handshake = req_klass(self._sasl_mechanism)
        response = yield from self.send(sasl_handshake)
        error_type = Errors.for_code(response.error_code)
        if error_type is not Errors.NoError:
            error = error_type(self)
            self.close(reason=CloseReason.AUTH_FAILURE, exc=error)
            raise error

        if self._sasl_mechanism not in response.enabled_mechanisms:
            exc = Errors.UnsupportedSaslMechanismError(
                'Kafka broker does not support %s sasl mechanism. '
                'Enabled mechanisms are: %s' %
                (self._sasl_mechanism, response.enabled_mechanisms))
            self.close(reason=CloseReason.AUTH_FAILURE, exc=exc)
            raise exc

        assert self._sasl_mechanism == 'PLAIN'
        if self._security_protocol == 'SASL_PLAINTEXT':
            self.log.warning('Sending username and password in the clear')

        authenticator = self.authenticator_plain()

        auth_bytes = None
        while True:
            try:
                payload = authenticator.send(auth_bytes)
            except StopIteration:
                break

            if req_klass.API_VERSION == 0:
                auth_bytes = yield from self._send_sasl_token(payload)
            else:
                req_klass = self._version_info.pick_best(
                    SaslAuthenticateRequest)
                req = req_klass(payload)
                resp = yield from self.send(req)
                error_type = Errors.for_code(resp.error_code)
                if error_type is not Errors.NoError:
                    exc = error_type(resp.error_message)
                    self.close(reason=CloseReason.AUTH_FAILURE, exc=exc)
                    raise exc
                auth_bytes = resp.sasl_auth_bytes

        self.log.info('Authenticated as %s via PLAIN',
                      self._sasl_plain_username)
Exemple #2
0
    def handle_response(self, resp):
        txn_manager = self._sender._txn_manager
        group_id = self._group_id

        error_type = Errors.for_code(resp.error_code)
        if error_type is Errors.NoError:
            log.debug("Successfully added consumer group %s to transaction",
                      group_id)
            txn_manager.consumer_group_added(group_id)
            return
        elif (error_type is CoordinatorNotAvailableError
              or error_type is NotCoordinatorError):
            self._sender._coordinator_dead(CoordinationType.TRANSACTION)
        elif (error_type is CoordinatorLoadInProgressError
              or error_type is ConcurrentTransactions):
            # We will just retry after backoff
            pass
        elif error_type is InvalidProducerEpoch:
            raise ProducerFenced()
        elif error_type is InvalidTxnState:
            raise error_type()
        elif error_type is TransactionalIdAuthorizationFailed:
            raise error_type(txn_manager.transactional_id)
        elif error_type is GroupAuthorizationFailedError:
            txn_manager.error_transaction(error_type(self._group_id))
            return
        else:
            log.error(
                "Could not add consumer group due to unexpected error: %s",
                error_type)
            raise error_type()

        return self._default_backoff
Exemple #3
0
    def _check_api_version_response(self, response):
        # The logic here is to check the list of supported request versions
        # in descending order. As soon as we find one that works, return it
        test_cases = [
            # format (<broker verion>, <needed struct>)
            ((2, 1, 0), MetadataRequest[0].API_KEY, 7),
            ((1, 1, 0), FetchRequest[0].API_KEY, 7),
            ((1, 0, 0), MetadataRequest[0].API_KEY, 5),
            ((0, 11, 0), MetadataRequest[0].API_KEY, 4),
            ((0, 10, 2), OffsetFetchRequest[0].API_KEY, 2),
            ((0, 10, 1), MetadataRequest[0].API_KEY, 2),
        ]

        error_type = Errors.for_code(response.error_code)
        assert error_type is Errors.NoError, "API version check failed"
        max_versions = dict([
            (api_key, max_version)
            for api_key, _, max_version in response.api_versions
        ])
        # Get the best match of test cases
        for broker_version, api_key, version in test_cases:
            if max_versions.get(api_key, -1) >= version:
                return broker_version

        # We know that ApiVersionResponse is only supported in 0.10+
        # so if all else fails, choose that
        return (0, 10, 0)
Exemple #4
0
    def _check_api_version_response(self, response):
        # The logic here is to check the list of supported request versions
        # in descending order. As soon as we find one that works, return it
        test_cases = [
            # format (<broker version>, <needed struct>)
            # TODO Requires unreleased version of python-kafka
            # ((2, 6, 0), DescribeClientQuotasRequest[0]),
            ((2, 5, 0), DescribeAclsRequest_v2),
            ((2, 4, 0), ProduceRequest[8]),
            ((2, 3, 0), FetchRequest[11]),
            ((2, 2, 0), OffsetRequest[5]),
            ((2, 1, 0), FetchRequest[10]),
            ((2, 0, 0), FetchRequest[8]),
            ((1, 1, 0), FetchRequest[7]),
            ((1, 0, 0), MetadataRequest[5]),
            ((0, 11, 0), MetadataRequest[4]),
            ((0, 10, 2), OffsetFetchRequest[2]),
            ((0, 10, 1), MetadataRequest[2]),
        ]

        error_type = Errors.for_code(response.error_code)
        assert error_type is Errors.NoError, "API version check failed"
        max_versions = {
            api_key: max_version
            for api_key, _, max_version in response.api_versions
        }
        # Get the best match of test cases
        for broker_version, struct in test_cases:
            if max_versions.get(struct.API_KEY, -1) >= struct.API_VERSION:
                return broker_version

        # We know that ApiVersionResponse is only supported in 0.10+
        # so if all else fails, choose that
        return (0, 10, 0)
Exemple #5
0
    def _check_api_version_response(self, response):
        # The logic here is to check the list of supported request versions
        # in descending order. As soon as we find one that works, return it
        test_cases = [
            # format (<broker verion>, <needed struct>)
            ((1, 0, 0), MetadataRequest[0].API_KEY, 5),
            ((0, 11, 0), MetadataRequest[0].API_KEY, 4),
            ((0, 10, 2), OffsetFetchRequest[0].API_KEY, 2),
            ((0, 10, 1), MetadataRequest[0].API_KEY, 2),
        ]

        error_type = Errors.for_code(response.error_code)
        assert error_type is Errors.NoError, "API version check failed"
        max_versions = dict([
            (api_key, max_version)
            for api_key, _, max_version in response.api_versions
        ])
        # Get the best match of test cases
        for broker_version, api_key, version in test_cases:
            if max_versions.get(api_key, -1) >= version:
                return broker_version

        # We know that ApiVersionResponse is only supported in 0.10+
        # so if all else fails, choose that
        return (0, 10, 0)
Exemple #6
0
    def handle_response(self, resp):
        txn_manager = self._sender._txn_manager
        group_id = self._group_id

        for topic, partitions in resp.errors:
            for partition, error_code in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)

                if error_type is Errors.NoError:
                    offset = self._offsets[tp].offset
                    log.debug(
                        "Offset %s for partition %s committed to group %s",
                        offset, tp, group_id)
                    txn_manager.offset_committed(tp, offset, group_id)
                elif (error_type is CoordinatorNotAvailableError
                      or error_type is NotCoordinatorError or
                      # Copied from Java. Not sure why it's only in this case
                      error_type is RequestTimedOutError):
                    self._sender._coordinator_dead(CoordinationType.GROUP)
                    return self._default_backoff
                elif (error_type is CoordinatorLoadInProgressError
                      or error_type is UnknownTopicOrPartitionError):
                    # We will just retry after backoff
                    return self._default_backoff
                elif error_type is InvalidProducerEpoch:
                    raise ProducerFenced()
                else:
                    log.error(
                        "Could not commit offset for partition %s due to "
                        "unexpected error: %s", partition, error_type)
                    raise error_type()
Exemple #7
0
    def handle_response(self, resp):
        txn_manager = self._sender._txn_manager
        error_type = Errors.for_code(resp.error_code)

        if error_type is Errors.NoError:
            txn_manager.complete_transaction()
            return
        elif (error_type is CoordinatorNotAvailableError or
                error_type is NotCoordinatorError):
            self._sender._coordinator_dead(CoordinationType.TRANSACTION)
        elif (error_type is CoordinatorLoadInProgressError or
                error_type is ConcurrentTransactions):
            # We will just retry after backoff
            pass
        elif error_type is InvalidProducerEpoch:
            raise ProducerFenced()
        elif error_type is InvalidTxnState:
            raise error_type()
        else:
            log.error(
                "Could not end transaction due to unexpected error: %s",
                error_type)
            raise error_type()

        return self._default_backoff
Exemple #8
0
    def handle_response(self, resp):
        txn_manager = self._sender._txn_manager
        error_type = Errors.for_code(resp.error_code)
        if error_type is Errors.NoError:
            log.debug(
                "Successfully found PID=%s EPOCH=%s for Producer %s",
                resp.producer_id, resp.producer_epoch,
                self._sender.client._client_id)
            self._sender._txn_manager.set_pid_and_epoch(
                resp.producer_id, resp.producer_epoch)
            return
        elif (error_type is CoordinatorNotAvailableError or
                error_type is NotCoordinatorError):
            self._sender._coordinator_dead(CoordinationType.TRANSACTION)
        elif (error_type is CoordinatorLoadInProgressError or
                error_type is ConcurrentTransactions):
            pass
        elif error_type is TransactionalIdAuthorizationFailed:
            raise error_type(txn_manager.transactional_id)
        else:
            log.error(
                "Unexpected error during InitProducerIdRequest: %s",
                error_type)
            raise error_type()

        return self._default_backoff
Exemple #9
0
    async def coordinator_lookup(self, coordinator_type, coordinator_key):
        """ Lookup which node in the cluster is the coordinator for a certain
        role (Transaction coordinator or Group coordinator atm.)
        NOTE: Client keeps track of all coordination nodes separately, as they
        all have different sockets and ids.
        """

        node_id = self.get_random_node()
        assert node_id is not None, "Did we not perform bootstrap?"

        log.debug("Sending FindCoordinator request for key %s to broker %s",
                  coordinator_key, node_id)

        if self.api_version > (0, 11):
            request = FindCoordinatorRequest[1](coordinator_key,
                                                coordinator_type)
        else:
            # Group coordination only
            assert coordinator_type == CoordinationType.GROUP, \
                "No transactions for older brokers"
            request = FindCoordinatorRequest[0](coordinator_key)

        resp = await self.send(node_id, request)
        log.debug("Received group coordinator response %s", resp)
        error_type = Errors.for_code(resp.error_code)
        if error_type is not Errors.NoError:
            err = error_type()
            raise err
        self.cluster.add_coordinator(resp.coordinator_id,
                                     resp.host,
                                     resp.port,
                                     rack=None,
                                     purpose=(coordinator_type,
                                              coordinator_key))
        return resp.coordinator_id
Exemple #10
0
    def coordinator_lookup(self, coordinator_type, coordinator_key):
        """ Lookup which node in the cluster is the coordinator for a certain
        role (Transaction coordinator or Group coordinator atm.)
        NOTE: Client keeps track of all coordination nodes separately, as they
        all have different sockets and ids.
        """

        node_id = self.get_random_node()
        assert node_id is not None, "Did we not perform bootstrap?"

        log.debug(
            "Sending FindCoordinator request for key %s to broker %s",
            coordinator_key, node_id)

        if self.api_version > (0, 11):
            request = FindCoordinatorRequest[1](
                coordinator_key, coordinator_type)
        else:
            # Group coordination only
            assert coordinator_type == CoordinationType.GROUP, \
                "No transactions for older brokers"
            request = FindCoordinatorRequest[0](coordinator_key)

        resp = yield from self.send(node_id, request)
        log.debug("Received group coordinator response %s", resp)
        error_type = Errors.for_code(resp.error_code)
        if error_type is not Errors.NoError:
            err = error_type()
            raise err
        self.cluster.add_coordinator(
            resp.coordinator_id, resp.host, resp.port, rack=None,
            purpose=(coordinator_type, coordinator_key))
        return resp.coordinator_id
Exemple #11
0
    def handle_response(self, response):
        for topic, partitions in response.topics:
            for partition_info in partitions:
                global_error = None
                log_start_offset = None
                if response.API_VERSION < 2:
                    partition, error_code, offset = partition_info
                    # Mimic CREATE_TIME to take user provided timestamp
                    timestamp = -1
                elif 2 <= response.API_VERSION <= 4:
                    partition, error_code, offset, timestamp = partition_info
                elif 5 <= response.API_VERSION <= 7:
                    (
                        partition, error_code, offset, timestamp,
                        log_start_offset
                    ) = partition_info
                else:
                    # the ignored parameter is record_error of type
                    # list[(batch_index: int, error_message: str)]
                    (
                        partition, error_code, offset, timestamp,
                        log_start_offset, _, global_error
                    ) = partition_info
                tp = TopicPartition(topic, partition)
                error = Errors.for_code(error_code)
                batch = self._batches.get(tp)
                if batch is None:
                    continue

                if error is Errors.NoError:
                    batch.done(offset, timestamp, log_start_offset)
                elif error is DuplicateSequenceNumber:
                    # If we have received a duplicate sequence error,
                    # it means that the sequence number has advanced
                    # beyond the sequence of the current batch, and we
                    # haven't retained batch metadata on the broker to
                    # return the correct offset and timestamp.
                    #
                    # The only thing we can do is to return success to
                    # the user and not return a valid offset and
                    # timestamp.
                    batch.done(offset, timestamp, log_start_offset)
                elif not self._can_retry(error(), batch):
                    if error is InvalidProducerEpoch:
                        exc = ProducerFenced()
                    elif error is TopicAuthorizationFailedError:
                        exc = error(topic)
                    else:
                        exc = error()
                    batch.failure(exception=exc)
                else:
                    log.warning(
                        "Got error produce response on topic-partition"
                        " %s, retrying. Error: %s", tp, global_error or error)
                    # Ok, we can retry this batch
                    if getattr(error, "invalid_metadata", False):
                        self._client.force_metadata_update()
                    self._to_reenqueue.append(batch)
Exemple #12
0
    def handle_response(self, resp):
        txn_manager = self._sender._txn_manager

        unauthorized_topics = set()
        for topic, partitions in resp.errors:
            for partition, error_code in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)

                if error_type is Errors.NoError:
                    log.debug("Added partition %s to transaction", tp)
                    txn_manager.partition_added(tp)
                elif (error_type is CoordinatorNotAvailableError
                      or error_type is NotCoordinatorError):
                    self._sender._coordinator_dead(
                        CoordinationType.TRANSACTION)
                    return self._default_backoff
                elif error_type is ConcurrentTransactions:
                    # See KAFKA-5477: There is some time between commit and
                    # actual transaction marker write, that will produce this
                    # ConcurrentTransactions. We don't want the 100ms latency
                    # in that case.
                    if not txn_manager.txn_partitions:
                        return BACKOFF_OVERRIDE
                    else:
                        return self._default_backoff
                elif (error_type is CoordinatorLoadInProgressError
                      or error_type is UnknownTopicOrPartitionError):
                    return self._default_backoff
                elif error_type is InvalidProducerEpoch:
                    raise ProducerFenced()
                elif (error_type is InvalidProducerIdMapping
                      or error_type is InvalidTxnState):
                    raise error_type()
                elif error_type is TopicAuthorizationFailedError:
                    unauthorized_topics.add(topic)
                elif error_type is OperationNotAttempted:
                    pass
                elif error_type is TransactionalIdAuthorizationFailed:
                    raise error_type(txn_manager.transactional_id)
                else:
                    log.error(
                        "Could not add partition %s due to unexpected error:"
                        " %s", partition, error_type)
                    raise error_type()
        if unauthorized_topics:
            txn_manager.error_transaction(
                TopicAuthorizationFailedError(unauthorized_topics))
        return
Exemple #13
0
 def _send_req(self, node_id, request, *, group):
     """send request to Kafka node and mark coordinator as `dead`
     in error case
     """
     try:
         resp = yield from self._client.send(node_id, request, group=group)
     except Errors.KafkaError as err:
         log.error(
             'Error sending %s to node %s [%s] -- marking coordinator dead',
             request.__class__.__name__, node_id, err)
         self.coordinator_dead()
         raise err
     else:
         if not hasattr(resp, 'error_code'):
             return resp
         error_type = Errors.for_code(resp.error_code)
         if error_type is Errors.NoError:
             return resp
         else:
             raise error_type()
Exemple #14
0
    def _do_init_pid(self):
        init_pid_req = InitProducerIdRequest[0](
            transactional_id=self._transactional_id,
            transaction_timeout_ms=self._transaction_timeout_ms)

        node_id = self.client.get_random_node()
        try:
            resp = yield from self.client.send(node_id, init_pid_req)
        except KafkaError as err:
            log.debug("Could not send InitProducerIdRequest: %r", err)
            return False

        error = Errors.for_code(resp.error_code)
        if error is Errors.NoError:
            self._txn_manager.set_pid_and_epoch(resp.producer_id,
                                                resp.producer_epoch)
            # Just in case we got bad values from broker
            return self._txn_manager.has_pid()
        else:
            log.debug("Got an error for InitProducerIdRequest: %r", error)
            return False
Exemple #15
0
    def _proc_offsets_fetch_request(self, node_id, request):
        response = yield from self._send_req(
            node_id, request, group=ConnectionGroup.COORDINATION)
        offsets = {}
        for topic, partitions in response.topics:
            for partition, offset, metadata, error_code in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if error_type is not Errors.NoError:
                    error = error_type()
                    log.debug("Error fetching offset for %s: %s", tp, error)
                    if error_type is Errors.GroupLoadInProgressError:
                        # just retry
                        raise error
                    elif error_type is Errors.NotCoordinatorForGroupError:
                        # re-discover the coordinator and retry
                        self.coordinator_dead()
                        raise error
                    elif error_type in (Errors.UnknownMemberIdError,
                                        Errors.IllegalGenerationError):
                        # need to re-join group
                        self._subscription.mark_for_reassignment()
                        raise error
                    elif error_type is Errors.UnknownTopicOrPartitionError:
                        log.warning(
                            "OffsetFetchRequest -- unknown topic %s", topic)
                        continue
                    else:
                        log.error("Unknown error fetching offsets for %s: %s",
                                  tp, error)
                        raise error
                elif offset >= 0:
                    # record the position with the offset
                    # (-1 indicates no committed offset to fetch)
                    offsets[tp] = OffsetAndMetadata(offset, metadata)
                else:
                    log.debug(
                        "No committed offset for partition %s", tp)

        return offsets
Exemple #16
0
    def _proc_fetch_request(self, assignment, node_id, request):
        needs_wakeup = False
        try:
            response = yield from self._client.send(node_id, request)
        except Errors.KafkaError as err:
            log.error("Failed fetch messages from %s: %s", node_id, err)
            return False
        except asyncio.CancelledError:
            # Either `close()` or partition unassigned. Either way the result
            # is no longer of interest.
            return False

        if not assignment.active:
            log.debug(
                "Discarding fetch response since the assignment changed during"
                " fetch")
            return False

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, *part_data in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                fetch_offset = fetch_offsets[tp]
                tp_state = assignment.state_value(tp)
                if not tp_state.has_valid_position or \
                        tp_state.position != fetch_offset:
                    log.debug(
                        "Discarding fetch response for partition %s "
                        "since its offset %s does not match the current "
                        "position", tp, fetch_offset)
                    continue

                if error_type is Errors.NoError:
                    tp_state.highwater = highwater

                    # part_data also contains lso, aborted_transactions.
                    # message_set is last
                    records = MemoryRecords(part_data[-1])
                    if records.has_next():
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list", tp,
                            fetch_offset)

                        message_iterator = self._unpack_records(tp, records)
                        self._records[tp] = FetchResult(
                            tp,
                            message_iterator=message_iterator,
                            assignment=assignment,
                            backoff=self._prefetch_backoff,
                            fetch_offset=fetch_offset,
                            loop=self._loop)

                        # We added at least 1 successful record
                        needs_wakeup = True
                    elif records.size_in_bytes() > 0:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        err = RecordTooLargeError(
                            "There are some messages at [Partition=Offset]: "
                            "%s=%s whose size is larger than the fetch size %s"
                            " and hence cannot be ever returned. "
                            "Increase the fetch size, or decrease the maximum "
                            "message size the broker will allow.", tp,
                            fetch_offset, self._max_partition_fetch_bytes)
                        self._set_error(tp, err)
                        tp_state.consumed_to(tp_state.position + 1)
                        needs_wakeup = True

                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.force_metadata_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    if self._default_reset_strategy != \
                            OffsetResetStrategy.NONE:
                        tp_state.await_reset(self._default_reset_strategy)
                    else:
                        err = Errors.OffsetOutOfRangeError({tp: fetch_offset})
                        self._set_error(tp, err)
                        needs_wakeup = True
                    log.info(
                        "Fetch offset %s is out of range for partition %s,"
                        " resetting offset", fetch_offset, tp)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warning("Not authorized to read from topic %s.",
                                tp.topic)
                    err = Errors.TopicAuthorizationFailedError(tp.topic)
                    self._set_error(tp, err)
                    needs_wakeup = True
                else:
                    log.warning('Unexpected error while fetching data: %s',
                                error_type.__name__)
        return needs_wakeup
Exemple #17
0
    def _proc_offset_request(self, node_id, topic_data):
        if self._client.api_version < (0, 10, 1):
            version = 0
            # Version 0 had another field `max_offsets`, set it to `1`
            for topic, part_data in topic_data.items():
                topic_data[topic] = [(part, ts, 1) for part, ts in part_data]
        else:
            version = 1
        request = OffsetRequest[version](-1, list(topic_data.items()))

        response = yield from self._client.send(node_id, request)

        res_offsets = {}
        for topic, part_data in response.topics:
            for part, error_code, *partition_info in part_data:
                partition = TopicPartition(topic, part)
                error_type = Errors.for_code(error_code)
                if error_type is Errors.NoError:
                    if response.API_VERSION == 0:
                        offsets = partition_info[0]
                        assert len(offsets) <= 1, \
                            'Expected OffsetResponse with one offset'
                        if offsets:
                            offset = offsets[0]
                            log.debug(
                                "Handling v0 ListOffsetResponse response for "
                                "%s. Fetched offset %s", partition, offset)
                            res_offsets[partition] = (offset, None)
                        else:
                            res_offsets[partition] = (UNKNOWN_OFFSET, None)
                    else:
                        timestamp, offset = partition_info
                        log.debug(
                            "Handling ListOffsetResponse response for "
                            "%s. Fetched offset %s, timestamp %s", partition,
                            offset, timestamp)
                        res_offsets[partition] = (offset, timestamp)
                elif error_type is Errors.UnsupportedForMessageFormatError:
                    # The message format on the broker side is before 0.10.0,
                    # we will simply put None in the response.
                    log.debug(
                        "Cannot search by timestamp for partition %s "
                        "because the message format version is before "
                        "0.10.0", partition)
                elif error_type is Errors.NotLeaderForPartitionError:
                    log.debug(
                        "Attempt to fetch offsets for partition %s "
                        "failed "
                        "due to obsolete leadership information, retrying.",
                        partition)
                    raise error_type(partition)
                elif error_type is Errors.UnknownTopicOrPartitionError:
                    log.warning(
                        "Received unknown topic or partition error in "
                        "ListOffset request for partition %s. The "
                        "topic/partition may not exist or the user may not "
                        "have Describe access to it.", partition)
                    raise error_type(partition)
                else:
                    log.warning(
                        "Attempt to fetch offsets for partition %s failed due "
                        "to: %s", partition, error_type)
                    raise error_type(partition)
        return res_offsets
Exemple #18
0
    def _proc_fetch_request(self, assignment, node_id, request):
        needs_wakeup = False
        try:
            response = yield from self._client.send(node_id, request)
        except Errors.KafkaError as err:
            log.error("Failed fetch messages from %s: %s", node_id, err)
            return False
        except asyncio.CancelledError:
            # Either `close()` or partition unassigned. Either way the result
            # is no longer of interest.
            return False

        if not assignment.active:
            log.debug(
                "Discarding fetch response since the assignment changed during"
                " fetch")
            return False

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, *part_data in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                fetch_offset = fetch_offsets[tp]
                tp_state = assignment.state_value(tp)
                if not tp_state.has_valid_position or \
                        tp_state.position != fetch_offset:
                    log.debug(
                        "Discarding fetch response for partition %s "
                        "since its offset %s does not match the current "
                        "position", tp, fetch_offset)
                    continue

                if error_type is Errors.NoError:
                    if request.API_VERSION >= 4:
                        aborted_transactions = part_data[-2]
                        lso = part_data[-3]
                    else:
                        aborted_transactions = None
                        lso = None
                    tp_state.highwater = highwater
                    tp_state.lso = lso

                    # part_data also contains lso, aborted_transactions.
                    # message_set is last
                    records = MemoryRecords(part_data[-1])
                    if records.has_next():
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list",
                            tp, fetch_offset)

                        partition_records = PartitionRecords(
                            tp, records, aborted_transactions, fetch_offset,
                            self._key_deserializer, self._value_deserializer,
                            self._check_crcs, self._isolation_level)

                        self._records[tp] = FetchResult(
                            tp, partition_records=partition_records,
                            assignment=assignment,
                            backoff=self._prefetch_backoff,
                            loop=self._loop)

                        # We added at least 1 successful record
                        needs_wakeup = True
                    elif records.size_in_bytes() > 0:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        err = RecordTooLargeError(
                            "There are some messages at [Partition=Offset]: "
                            "%s=%s whose size is larger than the fetch size %s"
                            " and hence cannot be ever returned. "
                            "Increase the fetch size, or decrease the maximum "
                            "message size the broker will allow.",
                            tp, fetch_offset, self._max_partition_fetch_bytes)
                        self._set_error(tp, err)
                        tp_state.consumed_to(tp_state.position + 1)
                        needs_wakeup = True

                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.force_metadata_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    if self._default_reset_strategy != \
                            OffsetResetStrategy.NONE:
                        tp_state.await_reset(self._default_reset_strategy)
                    else:
                        err = Errors.OffsetOutOfRangeError({tp: fetch_offset})
                        self._set_error(tp, err)
                        needs_wakeup = True
                    log.info(
                        "Fetch offset %s is out of range for partition %s,"
                        " resetting offset", fetch_offset, tp)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warning(
                        "Not authorized to read from topic %s.", tp.topic)
                    err = Errors.TopicAuthorizationFailedError(tp.topic)
                    self._set_error(tp, err)
                    needs_wakeup = True
                else:
                    log.warning('Unexpected error while fetching data: %s',
                                error_type.__name__)
        return needs_wakeup
Exemple #19
0
    def _send_produce_req(self, node_id, batches):
        """ Create produce request to node
        If producer configured with `retries`>0 and produce response contain
        "failed" partitions produce request for this partition will try
        resend to broker `retries` times with `retry_timeout_ms` timeouts.

        Arguments:
            node_id (int): kafka broker identifier
            batches (dict): dictionary of {TopicPartition: MessageBatch}
        """
        t0 = self._loop.time()

        topics = collections.defaultdict(list)
        for tp, batch in batches.items():
            topics[tp.topic].append((tp.partition, batch.get_data_buffer()))

        if self.client.api_version >= (0, 10):
            version = 2
        elif self.client.api_version == (0, 9):
            version = 1
        else:
            version = 0

        request = ProduceRequest[version](required_acks=self._acks,
                                          timeout=self._request_timeout_ms,
                                          topics=list(topics.items()))

        reenqueue = []
        try:
            response = yield from self.client.send(node_id, request)
        except KafkaError as err:
            log.warning("Got error produce response: %s", err)
            if getattr(err, "invalid_metadata", False):
                self.client.force_metadata_update()

            for batch in batches.values():
                if not self._can_retry(err, batch):
                    batch.failure(exception=err)
                else:
                    reenqueue.append(batch)
        else:
            # noacks, just mark batches as "done"
            if request.required_acks == 0:
                for batch in batches.values():
                    batch.done_noack()
            else:
                for topic, partitions in response.topics:
                    for partition_info in partitions:
                        if response.API_VERSION < 2:
                            partition, error_code, offset = partition_info
                            # Mimic CREATE_TIME to take user provided timestamp
                            timestamp = -1
                        else:
                            partition, error_code, offset, timestamp = \
                                partition_info
                        tp = TopicPartition(topic, partition)
                        error = Errors.for_code(error_code)
                        batch = batches.pop(tp, None)
                        if batch is None:
                            continue

                        if error is Errors.NoError:
                            batch.done(offset, timestamp)
                        elif not self._can_retry(error(), batch):
                            batch.failure(exception=error())
                        else:
                            log.warning(
                                "Got error produce response on topic-partition"
                                " %s, retrying. Error: %s", tp, error)
                            # Ok, we can retry this batch
                            if getattr(error, "invalid_metadata", False):
                                self.client.force_metadata_update()
                            reenqueue.append(batch)

        if reenqueue:
            # Wait backoff before reequeue
            yield from asyncio.sleep(self._retry_backoff, loop=self._loop)

            for batch in reenqueue:
                self._message_accumulator.reenqueue(batch)
            # If some error started metadata refresh we have to wait before
            # trying again
            yield from self.client._maybe_wait_metadata()

        # if batches for node is processed in less than a linger seconds
        # then waiting for the remaining time
        sleep_time = self._linger_time - (self._loop.time() - t0)
        if sleep_time > 0:
            yield from asyncio.sleep(sleep_time, loop=self._loop)

        self._in_flight.remove(node_id)
Exemple #20
0
    def commit_offsets(self, offsets):
        """Commit specific offsets asynchronously.

        Arguments:
            offsets (dict {TopicPartition: OffsetAndMetadata}): what to commit

        Raises error on failure
        """
        self._subscription.needs_fetch_committed_offsets = True
        if not offsets:
            log.debug('No offsets to commit')
            return True

        if (yield from self.coordinator_unknown()):
            raise Errors.GroupCoordinatorNotAvailableError()
        node_id = self.coordinator_id

        # create the offset commit request
        offset_data = collections.defaultdict(list)
        for tp, offset in offsets.items():
            offset_data[tp.topic].append(
                (tp.partition, offset.offset, offset.metadata))

        request = OffsetCommitRequest(
            self.group_id, self.generation, self.member_id,
            OffsetCommitRequest.DEFAULT_RETENTION_TIME,
            [(topic, tp_offsets) for topic, tp_offsets in offset_data.items()])

        log.debug("Sending offset-commit request with %s for group %s to %s",
                  offsets, self.group_id, node_id)

        response = yield from self._send_req(
            node_id, request, group=ConnectionGroup.COORDINATION)

        unauthorized_topics = set()
        for topic, partitions in response.topics:
            for partition, error_code in partitions:
                tp = TopicPartition(topic, partition)
                offset = offsets[tp]

                error_type = Errors.for_code(error_code)
                if error_type is Errors.NoError:
                    log.debug("Committed offset %s for partition %s", offset,
                              tp)
                    if self._subscription.is_assigned(tp):
                        partition = self._subscription.assignment[tp]
                        partition.committed = offset.offset
                elif error_type is Errors.GroupAuthorizationFailedError:
                    log.error("OffsetCommit failed for group %s - %s",
                              self.group_id, error_type.__name__)
                    raise error_type()
                elif error_type is Errors.TopicAuthorizationFailedError:
                    unauthorized_topics.add(topic)
                elif error_type in (Errors.OffsetMetadataTooLargeError,
                                    Errors.InvalidCommitOffsetSizeError):
                    # raise the error to the user
                    log.info(
                        "OffsetCommit failed for group %s on partition %s"
                        " due to %s, will retry", self.group_id, tp,
                        error_type.__name__)
                    raise error_type()
                elif error_type is Errors.GroupLoadInProgressError:
                    # just retry
                    log.info(
                        "OffsetCommit failed for group %s because group is"
                        " initializing (%s), will retry", self.group_id,
                        error_type.__name__)
                    raise error_type()
                elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                                    Errors.NotCoordinatorForGroupError,
                                    Errors.RequestTimedOutError):
                    log.info(
                        "OffsetCommit failed for group %s due to a"
                        " coordinator error (%s), will find new coordinator"
                        " and retry", self.group_id, error_type.__name__)
                    self.coordinator_dead()
                    raise error_type()
                elif error_type in (Errors.UnknownMemberIdError,
                                    Errors.IllegalGenerationError,
                                    Errors.RebalanceInProgressError):
                    # need to re-join group
                    error = error_type(self.group_id)
                    log.error(
                        "OffsetCommit failed for group %s due to group"
                        " error (%s), will rejoin", self.group_id, error)
                    self._subscription.mark_for_reassignment()
                    raise error
                else:
                    log.error(
                        "OffsetCommit failed for group %s on partition %s"
                        " with offset %s: %s", self.group_id, tp, offset,
                        error_type.__name__)
                    raise error_type()

        if unauthorized_topics:
            log.error("OffsetCommit failed for unauthorized topics %s",
                      unauthorized_topics)
            raise Errors.TopicAuthorizationFailedError(unauthorized_topics)
Exemple #21
0
    def _proc_offset_request(self, node_id, topic_data):
        if self._client.api_version < (0, 10, 1):
            version = 0
            # Version 0 had another field `max_offsets`, set it to `1`
            for topic, part_data in topic_data.items():
                topic_data[topic] = [(part, ts, 1) for part, ts in part_data]
        else:
            version = 1
        request = OffsetRequest[version](-1, list(topic_data.items()))

        response = yield from self._client.send(node_id, request)

        res_offsets = {}
        for topic, part_data in response.topics:
            for part, error_code, *partition_info in part_data:
                partition = TopicPartition(topic, part)
                error_type = Errors.for_code(error_code)
                if error_type is Errors.NoError:
                    if response.API_VERSION == 0:
                        offsets = partition_info[0]
                        assert len(offsets) <= 1, \
                            'Expected OffsetResponse with one offset'
                        if offsets:
                            offset = offsets[0]
                            log.debug(
                                "Handling v0 ListOffsetResponse response for "
                                "%s. Fetched offset %s", partition, offset)
                            res_offsets[partition] = (offset, None)
                        else:
                            res_offsets[partition] = (UNKNOWN_OFFSET, None)
                    else:
                        timestamp, offset = partition_info
                        log.debug(
                            "Handling ListOffsetResponse response for "
                            "%s. Fetched offset %s, timestamp %s",
                            partition, offset, timestamp)
                        res_offsets[partition] = (offset, timestamp)
                elif error_type is Errors.UnsupportedForMessageFormatError:
                    # The message format on the broker side is before 0.10.0,
                    # we will simply put None in the response.
                    log.debug("Cannot search by timestamp for partition %s "
                              "because the message format version is before "
                              "0.10.0", partition)
                elif error_type is Errors.NotLeaderForPartitionError:
                    log.debug(
                        "Attempt to fetch offsets for partition %s ""failed "
                        "due to obsolete leadership information, retrying.",
                        partition)
                    raise error_type(partition)
                elif error_type is Errors.UnknownTopicOrPartitionError:
                    log.warning(
                        "Received unknown topic or partition error in "
                        "ListOffset request for partition %s. The "
                        "topic/partition may not exist or the user may not "
                        "have Describe access to it.", partition)
                    raise error_type(partition)
                else:
                    log.warning(
                        "Attempt to fetch offsets for partition %s failed due "
                        "to: %s", partition, error_type)
                    raise error_type(partition)
        return res_offsets
Exemple #22
0
    def _do_sasl_handshake(self):
        # NOTE: We will only fallback to v0.9 gssapi scheme if user explicitly
        #       stated, that api_version is "0.9"
        if self._version_hint and self._version_hint < (0, 10):
            handshake_klass = None
            assert self._sasl_mechanism == 'GSSAPI', (
                "Only GSSAPI supported for v0.9"
            )
        else:
            handshake_klass = self._version_info.pick_best(
                SaslHandShakeRequest)

            sasl_handshake = handshake_klass(self._sasl_mechanism)
            response = yield from self.send(sasl_handshake)
            error_type = Errors.for_code(response.error_code)
            if error_type is not Errors.NoError:
                error = error_type(self)
                self.close(reason=CloseReason.AUTH_FAILURE, exc=error)
                raise error

            if self._sasl_mechanism not in response.enabled_mechanisms:
                exc = Errors.UnsupportedSaslMechanismError(
                    'Kafka broker does not support %s sasl mechanism. '
                    'Enabled mechanisms are: %s'
                    % (self._sasl_mechanism, response.enabled_mechanisms))
                self.close(reason=CloseReason.AUTH_FAILURE, exc=exc)
                raise exc

        assert self._sasl_mechanism in ('PLAIN', 'GSSAPI')
        if self._security_protocol == 'SASL_PLAINTEXT' and \
           self._sasl_mechanism == 'PLAIN':
            self.log.warning(
                'Sending username and password in the clear')

        if self._sasl_mechanism == 'GSSAPI':
            authenticator = self.authenticator_gssapi()
        else:
            authenticator = self.authenticator_plain()

        if handshake_klass is not None and sasl_handshake.API_VERSION > 0:
            auth_klass = self._version_info.pick_best(SaslAuthenticateRequest)
        else:
            auth_klass = None

        auth_bytes = None
        expect_response = True

        while True:
            res = yield from authenticator.step(auth_bytes)
            if res is None:
                break
            payload, expect_response = res

            # Before Kafka 1.0.0 Authentication bytes for SASL were send
            # without a Kafka Header, only with Length. This made error
            # handling hard, so they made SaslAuthenticateRequest to properly
            # pass error messages to clients on source of error.
            if auth_klass is None:
                auth_bytes = yield from self._send_sasl_token(payload,
                                                              expect_response)
            else:
                req = auth_klass(payload)
                resp = yield from self.send(req)
                error_type = Errors.for_code(resp.error_code)
                if error_type is not Errors.NoError:
                    exc = error_type(resp.error_message)
                    self.close(reason=CloseReason.AUTH_FAILURE, exc=exc)
                    raise exc
                auth_bytes = resp.sasl_auth_bytes

        if self._sasl_mechanism == 'GSSAPI':
            self.log.info(
                'Authenticated as %s via GSSAPI',
                self.sasl_principal)
        else:
            self.log.info('Authenticated as %s via PLAIN',
                          self._sasl_plain_username)
Exemple #23
0
    async def _do_sasl_handshake(self):
        # NOTE: We will only fallback to v0.9 gssapi scheme if user explicitly
        #       stated, that api_version is "0.9"
        if self._version_hint and self._version_hint < (0, 10):
            handshake_klass = None
            assert self._sasl_mechanism == 'GSSAPI', (
                "Only GSSAPI supported for v0.9")
        else:
            handshake_klass = self._version_info.pick_best(
                SaslHandShakeRequest)

            sasl_handshake = handshake_klass(self._sasl_mechanism)
            response = await self.send(sasl_handshake)
            error_type = Errors.for_code(response.error_code)
            if error_type is not Errors.NoError:
                error = error_type(self)
                self.close(reason=CloseReason.AUTH_FAILURE, exc=error)
                raise error

            if self._sasl_mechanism not in response.enabled_mechanisms:
                exc = Errors.UnsupportedSaslMechanismError(
                    'Kafka broker does not support %s sasl mechanism. '
                    'Enabled mechanisms are: %s' %
                    (self._sasl_mechanism, response.enabled_mechanisms))
                self.close(reason=CloseReason.AUTH_FAILURE, exc=exc)
                raise exc

        assert self._sasl_mechanism in ('PLAIN', 'GSSAPI', 'SCRAM-SHA-256',
                                        'SCRAM-SHA-512', 'OAUTHBEARER')
        if self._security_protocol == 'SASL_PLAINTEXT' and \
           self._sasl_mechanism == 'PLAIN':
            self.log.warning('Sending username and password in the clear')

        if self._sasl_mechanism == 'GSSAPI':
            authenticator = self.authenticator_gssapi()
        elif self._sasl_mechanism.startswith('SCRAM-SHA-'):
            authenticator = self.authenticator_scram()
        elif self._sasl_mechanism == 'OAUTHBEARER':
            authenticator = self.authenticator_oauth()
        else:
            authenticator = self.authenticator_plain()

        if handshake_klass is not None and sasl_handshake.API_VERSION > 0:
            auth_klass = self._version_info.pick_best(SaslAuthenticateRequest)
        else:
            auth_klass = None

        auth_bytes = None
        expect_response = True

        while True:
            res = await authenticator.step(auth_bytes)
            if res is None:
                break
            payload, expect_response = res

            # Before Kafka 1.0.0 Authentication bytes for SASL were send
            # without a Kafka Header, only with Length. This made error
            # handling hard, so they made SaslAuthenticateRequest to properly
            # pass error messages to clients on source of error.
            if auth_klass is None:
                auth_bytes = await self._send_sasl_token(
                    payload, expect_response)
            else:
                req = auth_klass(payload)
                resp = await self.send(req)
                error_type = Errors.for_code(resp.error_code)
                if error_type is not Errors.NoError:
                    exc = error_type(resp.error_message)
                    self.close(reason=CloseReason.AUTH_FAILURE, exc=exc)
                    raise exc
                auth_bytes = resp.sasl_auth_bytes

        if self._sasl_mechanism == 'GSSAPI':
            self.log.info('Authenticated as %s via GSSAPI',
                          self.sasl_principal)
        elif self._sasl_mechanism == 'OAUTHBEARER':
            self.log.info('Authenticated via OAUTHBEARER')
        else:
            self.log.info('Authenticated as %s via PLAIN',
                          self._sasl_plain_username)
Exemple #24
0
    def _proc_fetch_request(self, node_id, request):
        needs_wakeup = False
        needs_position_update = []
        try:
            response = yield from self._client.send(node_id, request)
        except Errors.KafkaError as err:
            log.error("Failed fetch messages from %s: %s", node_id, err)
            return False
        finally:
            self._in_flight.remove(node_id)

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, raw_batch in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if not self._subscriptions.is_fetchable(tp):
                    # this can happen when a rebalance happened
                    log.debug(
                        "Ignoring fetched records for partition %s"
                        " since it is no longer fetchable", tp)

                elif error_type is Errors.NoError:
                    tp_assignment = self._subscriptions.assignment[tp]
                    tp_assignment.highwater = highwater

                    # `drop_pending_message_set` is set after a seek to another
                    # position. If we request the *new* position we have to
                    # drop this flag, so we catch future seek's.
                    fetch_offset = fetch_offsets[tp]
                    if fetch_offset == tp_assignment.position:
                        tp_assignment.drop_pending_message_set = False

                    records = MemoryRecords(raw_batch)
                    if records.has_next():
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list", tp,
                            fetch_offset)

                        message_iterator = self._unpack_records(tp, records)
                        self._records[tp] = FetchResult(
                            tp,
                            records=message_iterator,
                            subscriptions=self._subscriptions,
                            backoff=self._prefetch_backoff,
                            loop=self._loop)

                        # We added at least 1 successful record
                        needs_wakeup = True
                    elif records.size_in_bytes() > 0:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        err = RecordTooLargeError(
                            "There are some messages at [Partition=Offset]: "
                            "%s=%s whose size is larger than the fetch size %s"
                            " and hence cannot be ever returned. "
                            "Increase the fetch size, or decrease the maximum "
                            "message size the broker will allow.", tp,
                            fetch_offset, self._max_partition_fetch_bytes)
                        self._set_error(tp, err)
                        needs_wakeup = True
                        self._subscriptions.assignment[tp].position += 1

                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.force_metadata_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    fetch_offset = fetch_offsets[tp]
                    if self._subscriptions.has_default_offset_reset_policy():
                        self._subscriptions.need_offset_reset(tp)
                        needs_position_update.append(tp)
                    else:
                        err = Errors.OffsetOutOfRangeError({tp: fetch_offset})
                        self._set_error(tp, err)
                        needs_wakeup = True
                    log.info(
                        "Fetch offset %s is out of range for partition %s,"
                        " resetting offset", fetch_offset, tp)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warn("Not authorized to read from topic %s.", tp.topic)
                    err = Errors.TopicAuthorizationFailedError(tp.topic)
                    self._set_error(tp, err)
                    needs_wakeup = True
                else:
                    log.warn('Unexpected error while fetching data: %s',
                             error_type.__name__)

        if needs_position_update:
            try:
                yield from self.update_fetch_positions(needs_position_update)
            except Exception:  # pragma: no cover
                log.error("Unexpected error updating fetch positions",
                          exc_info=True)

        return needs_wakeup
Exemple #25
0
    def update_metadata(self, metadata):
        """Update cluster state given a MetadataResponse.

        Arguments:
            metadata (MetadataResponse): broker response to a metadata request

        Returns: None
        """

        if not metadata.brokers:
            log.warning("No broker metadata found in MetadataResponse")

        _new_brokers = {}
        for broker in metadata.brokers:
            if metadata.API_VERSION == 0:
                node_id, host, port = broker
                rack = None
            else:
                node_id, host, port, rack = broker
            _new_brokers.update({
                node_id: BrokerMetadata(node_id, host, port, rack)
            })

        if metadata.API_VERSION == 0:
            _new_controller = None
        else:
            _new_controller = _new_brokers.get(metadata.controller_id)

        _new_partitions = {}
        _new_broker_partitions = collections.defaultdict(set)
        _new_unauthorized_topics = set()
        _new_internal_topics = set()

        for topic_data in metadata.topics:
            if metadata.API_VERSION == 0:
                error_code, topic, partitions = topic_data
                is_internal = False
            else:
                error_code, topic, is_internal, partitions = topic_data
            if is_internal:
                _new_internal_topics.add(topic)
            error_type = Errors.for_code(error_code)
            if error_type is Errors.NoError:
                _new_partitions[topic] = {}
                for p_error, partition, leader, replicas, isr in partitions:
                    _new_partitions[topic][partition] = PartitionMetadata(
                        topic=topic, partition=partition, leader=leader,
                        replicas=replicas, isr=isr, error=p_error)
                    if leader != -1:
                        _new_broker_partitions[leader].add(
                            TopicPartition(topic, partition))

            elif error_type is Errors.LeaderNotAvailableError:
                log.warning("Topic %s is not available during auto-create"
                            " initialization", topic)
            elif error_type is Errors.UnknownTopicOrPartitionError:
                log.error("Topic %s not found in cluster metadata", topic)
            elif error_type is Errors.TopicAuthorizationFailedError:
                log.error("Topic %s is not authorized for this client", topic)
                _new_unauthorized_topics.add(topic)
            elif error_type is Errors.InvalidTopicError:
                log.error("'%s' is not a valid topic name", topic)
            else:
                log.error("Error fetching metadata for topic %s: %s",
                          topic, error_type)

        with self._lock:
            self._brokers = _new_brokers
            self.controller = _new_controller
            self._partitions = _new_partitions
            self._broker_partitions = _new_broker_partitions
            self.unauthorized_topics = _new_unauthorized_topics
            self.internal_topics = _new_internal_topics

        now = time.time() * 1000
        self._last_refresh_ms = now
        self._last_successful_refresh_ms = now

        log.debug("Updated cluster metadata to %s", self)

        for listener in self._listeners:
            listener(self)
Exemple #26
0
    def _send_produce_req(self, node_id, batches):
        """Create produce request to node
        If producer configured with `retries`>0 and produce response contain
        "failed" partitions produce request for this partition will try
        resend to broker `retries` times with `retry_timeout_ms` timeouts.

        Arguments:
            node_id (int): kafka broker identifier
            batches (dict): dictionary of {TopicPartition: MessageBatch}
        """
        self._in_flight.add(node_id)
        t0 = self._loop.time()
        while True:
            topics = collections.defaultdict(list)
            for tp, batch in batches.items():
                topics[tp.topic].append((tp.partition, batch.data()))

            if self.client.api_version >= (0, 10):
                version = 2
            elif self.client.api_version == (0, 9):
                version = 1
            else:
                version = 0

            request = ProduceRequest[version](
                required_acks=self._acks,
                timeout=self._request_timeout_ms,
                topics=list(topics.items()))

            try:
                response = yield from self.client.send(node_id, request)
            except KafkaError as err:
                for batch in batches.values():
                    if not err.retriable or batch.expired():
                        batch.done(exception=err)
                log.warning(
                    "Got error produce response: %s", err)
                if not err.retriable:
                    break
            else:
                if response is None:
                    # noacks, just "done" batches
                    for batch in batches.values():
                        batch.done()
                    break

                for topic, partitions in response.topics:
                    for partition_info in partitions:
                        if response.API_VERSION < 2:
                            partition, error_code, offset = partition_info
                        else:
                            partition, error_code, offset, _ = partition_info
                        tp = TopicPartition(topic, partition)
                        error = Errors.for_code(error_code)
                        batch = batches.pop(tp, None)
                        if batch is None:
                            continue

                        if error is Errors.NoError:
                            batch.done(offset)
                        elif not getattr(error, 'retriable', False) or \
                                batch.expired():
                            batch.done(exception=error())
                        else:
                            # Ok, we can retry this batch
                            batches[tp] = batch
                            log.warning(
                                "Got error produce response on topic-partition"
                                " %s, retrying. Error: %s", tp, error)

            if batches:
                yield from asyncio.sleep(
                    self._retry_backoff, loop=self._loop)
            else:
                break

        # if batches for node is processed in less than a linger seconds
        # then waiting for the remaining time
        sleep_time = self._linger_time - (self._loop.time() - t0)
        if sleep_time > 0:
            yield from asyncio.sleep(sleep_time, loop=self._loop)

        self._in_flight.remove(node_id)