Example #1
0
 def _are_subscriptions_identical(self):
     """
     Returns:
         true, if both potential consumers of partitions and potential partitions that consumers can
         consume are the same
     """
     if not has_identical_list_elements(
             list(six.itervalues(
                 self.partition_to_all_potential_consumers))):
         return False
     return has_identical_list_elements(
         list(six.itervalues(self.consumer_to_all_potential_partitions)))
 def consumer_thread(i):
     assert i not in consumers
     assert i not in stop
     stop[i] = threading.Event()
     consumers[i] = KafkaConsumer(topic,
                                  bootstrap_servers=connect_str,
                                  group_id=group_id,
                                  heartbeat_interval_ms=500)
     while not stop[i].is_set():
         for tp, records in six.itervalues(consumers[i].poll(100)):
             messages[i][tp].extend(records)
     consumers[i].close()
     consumers[i] = None
     stop[i] = None
Example #3
0
 def consumer_thread(i):
     assert i not in consumers
     assert i not in stop
     stop[i] = threading.Event()
     consumers[i] = KafkaConsumer(topic,
                                  bootstrap_servers=connect_str,
                                  group_id=group_id,
                                  heartbeat_interval_ms=500)
     while not stop[i].is_set():
         for tp, records in six.itervalues(consumers[i].poll(100)):
             messages[i][tp].extend(records)
     consumers[i].close()
     consumers[i] = None
     stop[i] = None
Example #4
0
    def assign(cls, cluster, member_metadata):
        # get all topics and check every memeber has the same
        all_topics = None
        for metadata in six.itervalues(member_metadata):
            if all_topics is None:
                all_topics = set(metadata.subscription)
            elif all_topics != set(metadata.subscription):
                diff = all_topics.symmetric_difference(metadata.subscription)
                raise UnmergeableTopcis(
                    'Topic(s) %s do not appear in all members',
                    ', '.join(diff))
        # get all partition numbers and check every topic has the same
        all_partitions = None
        for topic in all_topics:
            partitions = cluster.partitions_for_topic(topic)
            if partitions is None:
                raise UnmergeableTopcis('No partition metadata for topic %s',
                                        topic)
            if all_partitions is None:
                all_partitions = set(partitions)
            elif all_partitions != set(partitions):
                diff = all_partitions.symmetric_difference(partitions)
                raise UnmergeableTopcis(
                    'Partition(s) %s do not appear in all topics',
                    ', '.join(str(p) for p in diff))
        all_partitions = sorted(all_partitions)

        assignment = collections.defaultdict(
            lambda: collections.defaultdict(list))
        # round robin assignation of the partition numbers
        member_iter = itertools.cycle(sorted(member_metadata.keys()))
        for partition in all_partitions:
            member_id = next(member_iter)
            for topic in all_topics:
                assignment[member_id][topic].append(partition)

        protocol_assignment = {}
        for member_id in member_metadata:
            protocol_assignment[member_id] = ConsumerProtocolMemberAssignment(
                cls.version, sorted(assignment[member_id].items()), b'')
        return protocol_assignment
Example #5
0
    def assign(cls, cluster, member_metadata):
        all_topics = set()
        for metadata in six.itervalues(member_metadata):
            all_topics.update(metadata.subscription)

        all_topic_partitions = []
        for topic in all_topics:
            partitions = cluster.partitions_for_topic(topic)
            if partitions is None:
                log.warning('No partition metadata for topic %s', topic)
                continue
            for partition in partitions:
                all_topic_partitions.append(TopicPartition(topic, partition))
        all_topic_partitions.sort()

        # construct {member_id: {topic: [partition, ...]}}
        assignment = collections.defaultdict(
            lambda: collections.defaultdict(list))

        member_iter = itertools.cycle(sorted(member_metadata.keys()))
        for partition in all_topic_partitions:
            member_id = next(member_iter)

            # Because we constructed all_topic_partitions from the set of
            # member subscribed topics, we should be safe assuming that
            # each topic in all_topic_partitions is in at least one member
            # subscription; otherwise this could yield an infinite loop
            while partition.topic not in member_metadata[
                    member_id].subscription:
                member_id = next(member_iter)
            assignment[member_id][partition.topic].append(partition.partition)

        protocol_assignment = {}
        for member_id in member_metadata:
            protocol_assignment[member_id] = ConsumerProtocolMemberAssignment(
                cls.version, sorted(assignment[member_id].items()), b'')
        return protocol_assignment
Example #6
0
    def assign(cls, cluster, member_metadata):
        all_topics = set()
        for metadata in six.itervalues(member_metadata):
            all_topics.update(metadata.subscription)

        all_topic_partitions = []
        for topic in all_topics:
            partitions = cluster.partitions_for_topic(topic)
            if partitions is None:
                log.warning('No partition metadata for topic %s', topic)
                continue
            for partition in partitions:
                all_topic_partitions.append(TopicPartition(topic, partition))
        all_topic_partitions.sort()

        # construct {member_id: {topic: [partition, ...]}}
        assignment = collections.defaultdict(lambda: collections.defaultdict(list))

        member_iter = itertools.cycle(sorted(member_metadata.keys()))
        for partition in all_topic_partitions:
            member_id = next(member_iter)

            # Because we constructed all_topic_partitions from the set of
            # member subscribed topics, we should be safe assuming that
            # each topic in all_topic_partitions is in at least one member
            # subscription; otherwise this could yield an infinite loop
            while partition.topic not in member_metadata[member_id].subscription:
                member_id = next(member_iter)
            assignment[member_id][partition.topic].append(partition.partition)

        protocol_assignment = {}
        for member_id in member_metadata:
            protocol_assignment[member_id] = ConsumerProtocolMemberAssignment(
                cls.version,
                sorted(assignment[member_id].items()),
                b'')
        return protocol_assignment
    def _poll(self, timeout, sleep=True):
        # select on reads across all connected sockets, blocking up to timeout
        assert self.in_flight_request_count() > 0 or self._connecting or sleep

        responses = []
        processed = set()

        start_select = time.time()
        ready = self._selector.select(timeout)
        end_select = time.time()
        if self._sensors:
            self._sensors.select_time.record((end_select - start_select) * 1000000000)

        for key, events in ready:
            if key.fileobj is self._wake_r:
                self._clear_wake_fd()
                continue
            elif not (events & selectors.EVENT_READ):
                continue
            conn = key.data
            processed.add(conn)

            if not conn.in_flight_requests:
                # if we got an EVENT_READ but there were no in-flight requests, one of
                # two things has happened:
                #
                # 1. The remote end closed the connection (because it died, or because
                #    a firewall timed out, or whatever)
                # 2. The protocol is out of sync.
                #
                # either way, we can no longer safely use this connection
                #
                # Do a 1-byte read to check protocol didnt get out of sync, and then close the conn
                try:
                    unexpected_data = key.fileobj.recv(1)
                    if unexpected_data:  # anything other than a 0-byte read means protocol issues
                        log.warning('Protocol out of sync on %r, closing', conn)
                except socket.error:
                    pass
                conn.close()
                continue

            # Accumulate as many responses as the connection has pending
            while conn.in_flight_requests:
                response = conn.recv() # Note: conn.recv runs callbacks / errbacks

                # Incomplete responses are buffered internally
                # while conn.in_flight_requests retains the request
                if not response:
                    break
                responses.append(response)

        # Check for additional pending SSL bytes
        if self.config['security_protocol'] in ('SSL', 'SASL_SSL'):
            # TODO: optimize
            for conn in self._conns.values():
                if conn not in processed and conn.connected() and conn._sock.pending():
                    response = conn.recv()
                    if response:
                        responses.append(response)

        for conn in six.itervalues(self._conns):
            if conn.requests_timed_out():
                log.warning('%s timed out after %s ms. Closing connection.',
                            conn, conn.config['request_timeout_ms'])
                conn.close(error=Errors.RequestTimedOutError(
                    'Request timed out after %s ms' %
                    conn.config['request_timeout_ms']))

        if self._sensors:
            self._sensors.io_time.record((time.time() - end_select) * 1000000000)
        return responses
Example #8
0
    def _poll(self, timeout):
        """Returns list of (response, future) tuples"""
        processed = set()

        start_select = time.time()
        ready = self._selector.select(timeout)
        end_select = time.time()
        if self._sensors:
            self._sensors.select_time.record(
                (end_select - start_select) * 1000000000)

        for key, events in ready:
            if key.fileobj is self._wake_r:
                self._clear_wake_fd()
                continue
            elif not (events & selectors.EVENT_READ):
                continue
            conn = key.data
            processed.add(conn)

            if not conn.in_flight_requests:
                # if we got an EVENT_READ but there were no in-flight requests, one of
                # two things has happened:
                #
                # 1. The remote end closed the connection (because it died, or because
                #    a firewall timed out, or whatever)
                # 2. The protocol is out of sync.
                #
                # either way, we can no longer safely use this connection
                #
                # Do a 1-byte read to check protocol didnt get out of sync, and then close the conn
                try:
                    unexpected_data = key.fileobj.recv(1)
                    if unexpected_data:  # anything other than a 0-byte read means protocol issues
                        log.warning('Protocol out of sync on %r, closing',
                                    conn)
                except socket.error:
                    pass
                conn.close(
                    Errors.ConnectionError(
                        'Socket EVENT_READ without in-flight-requests'))
                continue

            self._idle_expiry_manager.update(conn.node_id)
            self._pending_completion.extend(conn.recv())

        # Check for additional pending SSL bytes
        if self.config['security_protocol'] in ('SSL', 'SASL_SSL'):
            # TODO: optimize
            for conn in self._conns.values():
                if conn not in processed and conn.connected(
                ) and conn._sock.pending():
                    self._pending_completion.extend(conn.recv())

        for conn in six.itervalues(self._conns):
            if conn.requests_timed_out():
                log.warning('%s timed out after %s ms. Closing connection.',
                            conn, conn.config['request_timeout_ms'])
                conn.close(error=Errors.RequestTimedOutError(
                    'Request timed out after %s ms' %
                    conn.config['request_timeout_ms']))

        if self._sensors:
            self._sensors.io_time.record(
                (time.time() - end_select) * 1000000000)

        self._maybe_close_oldest_connection()
Example #9
0
    def _poll(self, timeout, sleep=True):
        # select on reads across all connected sockets, blocking up to timeout
        assert self.in_flight_request_count() > 0 or self._connecting or sleep

        responses = []
        processed = set()

        start_select = time.time()
        ready = self._selector.select(timeout)
        end_select = time.time()
        if self._sensors:
            self._sensors.select_time.record((end_select - start_select) * 1000000000)

        for key, events in ready:
            if key.fileobj is self._wake_r:
                self._clear_wake_fd()
                continue
            elif not (events & selectors.EVENT_READ):
                continue
            conn = key.data
            processed.add(conn)

            if not conn.in_flight_requests:
                # if we got an EVENT_READ but there were no in-flight requests, one of
                # two things has happened:
                #
                # 1. The remote end closed the connection (because it died, or because
                #    a firewall timed out, or whatever)
                # 2. The protocol is out of sync.
                #
                # either way, we can no longer safely use this connection
                #
                # Do a 1-byte read to check protocol didnt get out of sync, and then close the conn
                try:
                    unexpected_data = key.fileobj.recv(1)
                    if unexpected_data:  # anything other than a 0-byte read means protocol issues
                        log.warning('Protocol out of sync on %r, closing', conn)
                except socket.error:
                    pass
                conn.close(Errors.ConnectionError('Socket EVENT_READ without in-flight-requests'))
                continue

            self._idle_expiry_manager.update(conn.node_id)

            # Accumulate as many responses as the connection has pending
            while conn.in_flight_requests:
                response = conn.recv()  # Note: conn.recv runs callbacks / errbacks

                # Incomplete responses are buffered internally
                # while conn.in_flight_requests retains the request
                if not response:
                    break
                responses.append(response)

        # Check for additional pending SSL bytes
        if self.config['security_protocol'] in ('SSL', 'SASL_SSL'):
            # TODO: optimize
            for conn in self._conns.values():
                if conn not in processed and conn.connected() and conn._sock.pending():
                    response = conn.recv()
                    if response:
                        responses.append(response)

        for conn in six.itervalues(self._conns):
            if conn.requests_timed_out():
                log.warning('%s timed out after %s ms. Closing connection.',
                            conn, conn.config['request_timeout_ms'])
                conn.close(error=Errors.RequestTimedOutError(
                    'Request timed out after %s ms' %
                    conn.config['request_timeout_ms']))

        if self._sensors:
            self._sensors.io_time.record((time.time() - end_select) * 1000000000)
        self._maybe_close_oldest_connection()
        return responses
Example #10
0
    def run_once(self):
        """Run a single iteration of sending."""
        while self._topics_to_add:
            self._client.add_topic(self._topics_to_add.pop())

        # get the list of partitions with data ready to send
        result = self._accumulator.ready(self._metadata)
        ready_nodes, next_ready_check_delay, unknown_leaders_exist = result

        # if there are any partitions whose leaders are not known yet, force
        # metadata update
        if unknown_leaders_exist:
            log.debug('Unknown leaders exist, requesting metadata update')
            self._metadata.request_update()

        # remove any nodes we aren't ready to send to
        not_ready_timeout = 999999999
        for node in list(ready_nodes):
            if not self._client.ready(node):
                log.debug('Node %s not ready; delaying produce of accumulated batch', node)
                ready_nodes.remove(node)
                not_ready_timeout = min(not_ready_timeout,
                                        self._client.connection_delay(node))

        # create produce requests
        batches_by_node = self._accumulator.drain(
            self._metadata, ready_nodes, self.config['max_request_size'])

        if self.config['guarantee_message_order']:
            # Mute all the partitions drained
            for batch_list in six.itervalues(batches_by_node):
                for batch in batch_list:
                    self._accumulator.muted.add(batch.topic_partition)

        expired_batches = self._accumulator.abort_expired_batches(
            self.config['request_timeout_ms'], self._metadata)
        for expired_batch in expired_batches:
            self._sensors.record_errors(expired_batch.topic_partition.topic, expired_batch.record_count)

        self._sensors.update_produce_request_metrics(batches_by_node)
        requests = self._create_produce_requests(batches_by_node)
        # If we have any nodes that are ready to send + have sendable data,
        # poll with 0 timeout so this can immediately loop and try sending more
        # data. Otherwise, the timeout is determined by nodes that have
        # partitions with data that isn't yet sendable (e.g. lingering, backing
        # off). Note that this specifically does not include nodes with
        # sendable data that aren't ready to send since they would cause busy
        # looping.
        poll_timeout_ms = min(next_ready_check_delay * 1000, not_ready_timeout)
        if ready_nodes:
            log.debug("Nodes with data ready to send: %s", ready_nodes) # trace
            log.debug("Created %d produce requests: %s", len(requests), requests) # trace
            poll_timeout_ms = 0

        for node_id, request in six.iteritems(requests):
            batches = batches_by_node[node_id]
            log.debug('Sending Produce Request: %r', request)
            (self._client.send(node_id, request)
                 .add_callback(
                     self._handle_produce_response, node_id, time.time(), batches)
                 .add_errback(
                     self._failed_produce, batches, node_id))

        # if some partitions are already ready to be sent, the select time
        # would be 0; otherwise if some partition already has some data
        # accumulated but not ready yet, the select time will be the time
        # difference between now and its linger expiry time; otherwise the
        # select time will be the time difference between now and the
        # metadata expiry time
        self._client.poll(poll_timeout_ms, sleep=True)
Example #11
0
    def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn):
        """
        Group a list of request payloads by topic+partition and send them to
        the leader broker for that partition using the supplied encode/decode
        functions

        Arguments:

        payloads: list of object-like entities with a topic (str) and
            partition (int) attribute; payloads with duplicate topic-partitions
            are not supported.

        encode_fn: a method to encode the list of payloads to a request body,
            must accept client_id, correlation_id, and payloads as
            keyword arguments

        decode_fn: a method to decode a response body into response objects.
            The response objects must be object-like and have topic
            and partition attributes

        Returns:

        List of response objects in the same order as the supplied payloads
        """
        # encoders / decoders do not maintain ordering currently
        # so we need to keep this so we can rebuild order before returning
        original_ordering = [(p.topic, p.partition) for p in payloads]

        # Connection errors generally mean stale metadata
        # although sometimes it means incorrect api request
        # Unfortunately there is no good way to tell the difference
        # so we'll just reset metadata on all errors to be safe
        refresh_metadata = False

        # For each broker, send the list of request payloads
        # and collect the responses and errors
        payloads_by_broker = self._payloads_by_broker(payloads)
        responses = {}

        def failed_payloads(payloads):
            for payload in payloads:
                topic_partition = (str(payload.topic), payload.partition)
                responses[topic_partition] = FailedPayloadsError(payload)

        futures_by_connection = {}
        selector = selectors.DefaultSelector()

        for broker, broker_payloads in six.iteritems(payloads_by_broker):
            if broker is None:
                failed_payloads(broker_payloads)
                continue


            host, port, afi = get_ip_port_afi(broker.host)
            try:
                conn = self._get_conn(host, broker.port, afi, broker.nodeId)
            except ConnectionError:
                refresh_metadata = True
                failed_payloads(broker_payloads)
                continue

            request = encoder_fn(payloads=broker_payloads)
            # decoder_fn=None signal that the server is expected to not
            # send a response.  This probably only applies to
            # ProduceRequest w/ acks = 0
            expect_response = (decoder_fn is not None)
            if expect_response:
                selector.register(conn._sock, selectors.EVENT_READ, conn)
            future = conn.send(request, expect_response=expect_response)

            if future.failed():
                log.error("Request failed: %s", future.exception)
                selector.unregister(conn._sock)
                refresh_metadata = True
                failed_payloads(broker_payloads)
                continue

            if not expect_response:
                for payload in broker_payloads:
                    topic_partition = (str(payload.topic), payload.partition)
                    responses[topic_partition] = None
                continue

            futures_by_connection[conn] = (future, broker)

        timeout = self.timeout
        while futures_by_connection:
            start_time = time.time()

            ready = selector.select(timeout)

            for key, _ in ready:

                conn = key.data
                future, _ = futures_by_connection[conn]
                while not future.is_done:
                    conn.recv()
                _, broker = futures_by_connection.pop(conn)

                if future.failed():
                    log.error("Request failed: %s", future.exception)
                    refresh_metadata = True
                    failed_payloads(payloads_by_broker[broker])

                else:
                    for payload_response in decoder_fn(future.value):
                        topic_partition = (str(payload_response.topic),
                                           payload_response.partition)
                        responses[topic_partition] = payload_response

            timeout -= time.time() - start_time
            if timeout < 0:
                log.error("%s requests timed out.", len(futures_by_connection))
                for _, broker in six.itervalues(futures_by_connection):
                    failed_payloads(payloads_by_broker[broker])
                    refresh_metadata = True
                break

        if refresh_metadata:
            self.reset_all_metadata()

        selector.close()
        # Return responses in the same order as provided
        return [responses[tp] for tp in original_ordering]
Example #12
0
    def _poll(self, timeout):
        # This needs to be locked, but since it is only called from within the
        # locked section of poll(), there is no additional lock acquisition here
        processed = set()

        # Send pending requests first, before polling for responses
        self._register_send_sockets()

        start_select = time.time()
        ready = self._selector.select(timeout)
        end_select = time.time()
        if self._sensors:
            self._sensors.select_time.record(
                (end_select - start_select) * 1000000000)

        for key, events in ready:
            if key.fileobj is self._wake_r:
                self._clear_wake_fd()
                continue

            # Send pending requests if socket is ready to write
            if events & selectors.EVENT_WRITE:
                conn = key.data
                if conn.connecting():
                    conn.connect()
                else:
                    if conn.send_pending_requests_v2():
                        # If send is complete, we dont need to track write readiness
                        # for this socket anymore
                        if key.events ^ selectors.EVENT_WRITE:
                            self._selector.modify(
                                key.fileobj,
                                key.events ^ selectors.EVENT_WRITE, key.data)
                        else:
                            self._selector.unregister(key.fileobj)

            if not (events & selectors.EVENT_READ):
                continue
            conn = key.data
            processed.add(conn)

            if not conn.in_flight_requests:
                # if we got an EVENT_READ but there were no in-flight requests, one of
                # two things has happened:
                #
                # 1. The remote end closed the connection (because it died, or because
                #    a firewall timed out, or whatever)
                # 2. The protocol is out of sync.
                #
                # either way, we can no longer safely use this connection
                #
                # Do a 1-byte read to check protocol didnt get out of sync, and then close the conn
                try:
                    unexpected_data = key.fileobj.recv(1)
                    if unexpected_data:  # anything other than a 0-byte read means protocol issues
                        log.warning('Protocol out of sync on %r, closing',
                                    conn)
                except socket.error:
                    pass
                conn.close(
                    Errors.KafkaConnectionError(
                        'Socket EVENT_READ without in-flight-requests'))
                continue

            self._idle_expiry_manager.update(conn.node_id)
            self._pending_completion.extend(conn.recv())

        # Check for additional pending SSL bytes
        if self.config['security_protocol'] in ('SSL', 'SASL_SSL'):
            # TODO: optimize
            for conn in self._conns.values():
                if conn not in processed and conn.connected(
                ) and conn._sock.pending():
                    self._pending_completion.extend(conn.recv())

        for conn in six.itervalues(self._conns):
            if conn.requests_timed_out():
                log.warning('%s timed out after %s ms. Closing connection.',
                            conn, conn.config['request_timeout_ms'])
                conn.close(error=Errors.RequestTimedOutError(
                    'Request timed out after %s ms' %
                    conn.config['request_timeout_ms']))

        if self._sensors:
            self._sensors.io_time.record(
                (time.time() - end_select) * 1000000000)

        self._maybe_close_oldest_connection()
Example #13
0
    def _poll(self, timeout):
        """Returns list of (response, future) tuples"""
        processed = set()

        start_select = time.time()
        ready = self._selector.select(timeout)
        end_select = time.time()
        if self._sensors:
            self._sensors.select_time.record((end_select - start_select) * 1000000000)

        for key, events in ready:
            if key.fileobj is self._wake_r:
                self._clear_wake_fd()
                continue
            elif not (events & selectors.EVENT_READ):
                continue
            conn = key.data
            processed.add(conn)

            if not conn.in_flight_requests:
                # if we got an EVENT_READ but there were no in-flight requests, one of
                # two things has happened:
                #
                # 1. The remote end closed the connection (because it died, or because
                #    a firewall timed out, or whatever)
                # 2. The protocol is out of sync.
                #
                # either way, we can no longer safely use this connection
                #
                # Do a 1-byte read to check protocol didnt get out of sync, and then close the conn
                try:
                    unexpected_data = key.fileobj.recv(1)
                    if unexpected_data:  # anything other than a 0-byte read means protocol issues
                        log.warning('Protocol out of sync on %r, closing', conn)
                except socket.error:
                    pass
                conn.close(Errors.KafkaConnectionError('Socket EVENT_READ without in-flight-requests'))
                continue

            self._idle_expiry_manager.update(conn.node_id)
            self._pending_completion.extend(conn.recv())

        # Check for additional pending SSL bytes
        if self.config['security_protocol'] in ('SSL', 'SASL_SSL'):
            # TODO: optimize
            for conn in self._conns.values():
                if conn not in processed and conn.connected() and conn._sock.pending():
                    self._pending_completion.extend(conn.recv())

        for conn in six.itervalues(self._conns):
            if conn.requests_timed_out():
                log.warning('%s timed out after %s ms. Closing connection.',
                            conn, conn.config['request_timeout_ms'])
                conn.close(error=Errors.RequestTimedOutError(
                    'Request timed out after %s ms' %
                    conn.config['request_timeout_ms']))

        if self._sensors:
            self._sensors.io_time.record((time.time() - end_select) * 1000000000)

        self._maybe_close_oldest_connection()