def flush(self): """ Transforms the ``unsent`` structure to produce requests and sends them. The first order of business is to order the pending messages in ``unsent`` based on partition leader. If a message's partition leader is not a know broker, the message is queued up to be retried and the flag denoting that a cluster ``heal()`` call is needed is set. Once the legitimate messages are ordered, instances of ProduceRequest are created for each broker and sent. """ if not self.unsent: return # leader -> topic -> partition -> message list ordered = collections.defaultdict(lambda: collections.defaultdict( lambda: collections.defaultdict(list))) to_retry = collections.defaultdict(list) for topic, msgs in drain(self.unsent): for msg in msgs: partition = self.partitioner(msg.key, self.cluster.topics[topic]) leader = self.cluster.get_leader(topic, partition) if leader not in self.cluster: to_retry[topic].append(msg) continue ordered[leader][topic][partition].append(msg) requests = {} for leader, topics in six.iteritems(ordered): requests[leader] = produce_api.ProduceRequest( required_acks=self.required_acks, timeout=self.ack_timeout, topics=[]) for topic, partitions in six.iteritems(topics): requests[leader].topics.append( produce_api.TopicRequest(name=topic, partitions=[])) for partition_id, msgs in six.iteritems(partitions): requests[leader].topics[-1].partitions.append( produce_api.PartitionRequest( partition_id=partition_id, message_set=messages.MessageSet.compressed( self.compression, msgs))) self.sent[requests[leader]. correlation_id][topic][partition_id] = msgs for topic, msgs in six.iteritems(to_retry): self.queue_retries(topic, msgs) yield self.send(requests)
def test_leader_not_present_at_first(self): self.add_topic("test.topic", leaders=(7,)) p = producer.Producer(["kafka01"], batch_size=1) yield p.produce("test.topic", "foo") self.add_topic("test.topic", leaders=(1,)) yield p.cluster.heal() yield p.produce("test.topic", "bar") self.assert_sent( broker_id=1, request=produce.ProduceRequest( required_acks=-1, timeout=500, topics=[ produce.TopicRequest( name="test.topic", partitions=[ produce.PartitionRequest( partition_id=0, message_set=messages.MessageSet.compressed( compression=None, msgs=[ messages.Message( magic=0, attributes=0, key=None, value=p.serializer("foo") ), messages.Message( magic=0, attributes=0, key=None, value=p.serializer("bar") ), ] ) ) ] ) ] ) )
def test_unretriable_error(self): self.add_topic("test.topic", leaders=(1,)) self.set_responses( broker_id=1, api="produce", responses=[ produce.ProduceResponse( topics=[ produce.TopicResponse( name="test.topic", partitions=[ produce.PartitionResponse( partition_id=0, error_code=errors.unknown, offset=8001, ), ] ), ] ), produce.ProduceResponse( topics=[ produce.TopicResponse( name="test.topic", partitions=[ produce.PartitionResponse( partition_id=0, error_code=errors.no_error, offset=8001, ), ] ), ] ), ] ) msgs = ["foo", "bar"] p = producer.Producer(["kafka01"], batch_size=1) for msg in msgs: yield p.produce("test.topic", msg) self.assertEqual(len(self.requests_by_broker[1]), 2) self.assert_sent( broker_id=1, request=produce.ProduceRequest( required_acks=-1, timeout=500, topics=[ produce.TopicRequest( name="test.topic", partitions=[ produce.PartitionRequest( partition_id=0, message_set=messages.MessageSet.compressed( compression=None, msgs=[ messages.Message( magic=0, attributes=0, key=None, value=p.serializer("bar") ), ] ) ) ] ) ] ) )
def test_routing_to_partitions(self): self.add_topic("test.topic", leaders=(1, 1, 8, 3)) self.set_responses( broker_id=1, api="produce", responses=[ produce.ProduceResponse( topics=[ produce.TopicResponse( name="test.topic", partitions=[ produce.PartitionResponse( partition_id=0, error_code=errors.no_error, offset=8000, ), ] ), ] ), produce.ProduceResponse( topics=[ produce.TopicResponse( name="test.topic", partitions=[ produce.PartitionResponse( partition_id=1, error_code=errors.no_error, offset=8000, ), ] ), ] ), produce.ProduceResponse( topics=[ produce.TopicResponse( name="test.topic", partitions=[ produce.PartitionResponse( partition_id=0, error_code=errors.no_error, offset=8001, ), ] ), ] ), ] ) self.set_responses( broker_id=3, api="produce", responses=[ produce.ProduceResponse( topics=[ produce.TopicResponse( name="test.topic", partitions=[ produce.PartitionResponse( partition_id=3, error_code=errors.no_error, offset=8000, ), ] ), ] ), ] ) msgs = [ {"key": 0, "msg": "foo"}, {"key": 1, "msg": "bar"}, {"key": 3, "msg": "bwee"}, {"key": 0, "msg": "bwoo"}, ] p = producer.Producer( ["kafka01"], key_maker=attribute_key, partitioner=key_partitioner ) for msg in msgs: yield p.produce("test.topic", msg) self.assert_sent( broker_id=1, request=produce.ProduceRequest( required_acks=-1, timeout=500, topics=[ produce.TopicRequest( name="test.topic", partitions=[ produce.PartitionRequest( partition_id=0, message_set=messages.MessageSet.compressed( compression=None, msgs=[ messages.Message( magic=0, attributes=0, key=0, value=p.serializer( {"msg": "foo", "key": 0} ) ) ] ) ) ] ) ] ) ) self.assert_sent( broker_id=1, request=produce.ProduceRequest( required_acks=-1, timeout=500, topics=[ produce.TopicRequest( name="test.topic", partitions=[ produce.PartitionRequest( partition_id=1, message_set=messages.MessageSet.compressed( compression=None, msgs=[ messages.Message( magic=0, attributes=0, key=1, value=p.serializer( {"msg": "bar", "key": 1} ) ) ] ) ) ] ) ] ) ) self.assert_sent( broker_id=1, request=produce.ProduceRequest( required_acks=-1, timeout=500, topics=[ produce.TopicRequest( name="test.topic", partitions=[ produce.PartitionRequest( partition_id=0, message_set=messages.MessageSet.compressed( compression=None, msgs=[ messages.Message( magic=0, attributes=0, key=0, value=p.serializer( {"msg": "bwoo", "key": 0} ) ) ] ) ) ] ) ] ) ) self.assert_sent( broker_id=3, request=produce.ProduceRequest( required_acks=-1, timeout=500, topics=[ produce.TopicRequest( name="test.topic", partitions=[ produce.PartitionRequest( partition_id=3, message_set=messages.MessageSet.compressed( compression=None, msgs=[ messages.Message( magic=0, attributes=0, key=3, value=p.serializer( {"msg": "bwee", "key": 3} ) ) ] ) ) ] ) ] ) )
def test_gzip_compression(self): self.add_topic("test.topic", leaders=(1,)) self.set_responses( broker_id=1, api="produce", responses=[ produce.ProduceResponse( topics=[ produce.TopicResponse( name="test.topic", partitions=[ produce.PartitionResponse( partition_id=0, error_code=errors.no_error, offset=8003, ), ] ), ] ), ] ) msgs = [u"foö", "bar"] p = producer.Producer( ["kafka01"], batch_size=2, compression=constants.GZIP ) for msg in msgs: yield p.produce("test.topic", msg) self.assert_sent( broker_id=1, request=produce.ProduceRequest( required_acks=-1, timeout=500, topics=[ produce.TopicRequest( name="test.topic", partitions=[ produce.PartitionRequest( partition_id=0, message_set=messages.MessageSet.compressed( compression=constants.GZIP, msgs=[ messages.Message( magic=0, attributes=0, key=None, value=p.serializer(u"foö"), ), messages.Message( magic=0, attributes=0, key=None, value=p.serializer("bar"), ) ] ) ) ] ) ] ) )
def test_default_routing(self, mock_random): self.add_topic("test.topic", leaders=(1, 8)) choices_to_make = [0, 1] def get_next_choice(*args): return choices_to_make.pop(0) mock_random.choice.side_effect = get_next_choice self.set_responses( broker_id=1, api="produce", responses=[ produce.ProduceResponse( topics=[ produce.TopicResponse( name="test.topic", partitions=[ produce.PartitionResponse( partition_id=0, error_code=errors.no_error, offset=8000, ), ] ), ] ), ] ) self.set_responses( broker_id=8, api="produce", responses=[ produce.ProduceResponse( topics=[ produce.TopicResponse( name="test.topic", partitions=[ produce.PartitionResponse( partition_id=1, error_code=errors.no_error, offset=8000, ), ] ), ] ), ] ) msgs = ["foo", "bar"] p = producer.Producer(["kafka01"]) for msg in msgs: yield p.produce("test.topic", msg) self.assert_sent( broker_id=1, request=produce.ProduceRequest( required_acks=-1, timeout=500, topics=[ produce.TopicRequest( name="test.topic", partitions=[ produce.PartitionRequest( partition_id=0, message_set=messages.MessageSet.compressed( compression=None, msgs=[ messages.Message( magic=0, attributes=0, key=None, value=p.serializer("foo") ) ] ) ) ] ) ] ) ) self.assert_sent( broker_id=8, request=produce.ProduceRequest( required_acks=-1, timeout=500, topics=[ produce.TopicRequest( name="test.topic", partitions=[ produce.PartitionRequest( partition_id=1, message_set=messages.MessageSet.compressed( compression=None, msgs=[ messages.Message( magic=0, attributes=0, key=None, value=p.serializer("bar") ) ] ) ) ] ) ] ) )