def test_get_leader_exceptions_when_noleader(self, protocol, conn): mock_conn(conn) brokers = [BrokerMetadata(0, "broker_1", 4567, None), BrokerMetadata(1, "broker_2", 5678, None)] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [(NO_ERROR, "topic_noleader", [(NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], [])])] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) client = SimpleClient(hosts=["broker_1:4567"]) self.assertDictEqual( {TopicPartition("topic_noleader", 0): None, TopicPartition("topic_noleader", 1): None}, client.topics_to_brokers, ) # No leader partitions -- raise LeaderNotAvailableError with self.assertRaises(LeaderNotAvailableError): self.assertIsNone(client._get_leader_for_partition("topic_noleader", 0)) with self.assertRaises(LeaderNotAvailableError): self.assertIsNone(client._get_leader_for_partition("topic_noleader", 1)) # Unknown partitions -- raise UnknownTopicOrPartitionError with self.assertRaises(UnknownTopicOrPartitionError): self.assertIsNone(client._get_leader_for_partition("topic_noleader", 2)) topics = [(NO_ERROR, "topic_noleader", [(NO_ERROR, 0, 0, [0, 1], [0, 1]), (NO_ERROR, 1, 1, [1, 0], [1, 0])])] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) self.assertEqual(brokers[0], client._get_leader_for_partition("topic_noleader", 0)) self.assertEqual(brokers[1], client._get_leader_for_partition("topic_noleader", 1))
def test_get_leader_for_unassigned_partitions(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567, None), BrokerMetadata(1, 'broker_2', 5678, None) ] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [ (NO_LEADER, 'topic_no_partitions', []), (UNKNOWN_TOPIC_OR_PARTITION, 'topic_unknown', []), ] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) self.assertDictEqual({}, client.topics_to_brokers) with self.assertRaises(LeaderNotAvailableError): client._get_leader_for_partition('topic_no_partitions', 0) with self.assertRaises(UnknownTopicOrPartitionError): client._get_leader_for_partition('topic_unknown', 0)
def test_send_produce_request_raises_when_noleader(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567, None), BrokerMetadata(1, 'broker_2', 5678, None) ] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [ (NO_ERROR, 'topic_noleader', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), ] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) requests = [ProduceRequestPayload( "topic_noleader", 0, [create_message("a"), create_message("b")])] with self.assertRaises(FailedPayloadsError): client.send_produce_request(requests)
def test_get_leader_for_partitions_reloads_metadata(self, protocol, conn): "Get leader for partitions reload metadata if it is not available" mock_conn(conn) brokers = [BrokerMetadata(0, "broker_1", 4567, None), BrokerMetadata(1, "broker_2", 5678, None)] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [(NO_LEADER, "topic_no_partitions", [])] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) client = SimpleClient(hosts=["broker_1:4567"]) # topic metadata is loaded but empty self.assertDictEqual({}, client.topics_to_brokers) topics = [(NO_ERROR, "topic_one_partition", [(NO_ERROR, 0, 0, [0, 1], [0, 1])])] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) # calling _get_leader_for_partition (from any broker aware request) # will try loading metadata again for the same topic leader = client._get_leader_for_partition("topic_one_partition", 0) self.assertEqual(brokers[0], leader) self.assertDictEqual({TopicPartition("topic_one_partition", 0): brokers[0]}, client.topics_to_brokers)
def test_get_leader_for_partitions_reloads_metadata(self, protocol, conn): "Get leader for partitions reload metadata if it is not available" mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ (NO_LEADER, 'topic_no_partitions', []) ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) # topic metadata is loaded but empty self.assertDictEqual({}, client.topics_to_brokers) topics = [ (NO_ERROR, 'topic_one_partition', [ (NO_ERROR, 0, 0, [0, 1], [0, 1]) ]) ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) # calling _get_leader_for_partition (from any broker aware request) # will try loading metadata again for the same topic leader = client._get_leader_for_partition('topic_one_partition', 0) self.assertEqual(brokers[0], leader) self.assertDictEqual({ TopicPartition('topic_one_partition', 0): brokers[0]}, client.topics_to_brokers)
def test_correlation_rollover(self): with patch.object(SimpleClient, 'load_metadata_for_topics'): big_num = 2**31 - 3 client = SimpleClient(hosts=(), correlation_id=big_num) self.assertEqual(big_num + 1, client._next_id()) self.assertEqual(big_num + 2, client._next_id()) self.assertEqual(0, client._next_id())
def test_has_metadata_for_topic(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567, None), BrokerMetadata(1, 'broker_2', 5678, None) ] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [ (NO_LEADER, 'topic_still_creating', []), (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []), (NO_ERROR, 'topic_noleaders', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), ] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) # Topics with no partitions return False self.assertFalse(client.has_metadata_for_topic('topic_still_creating')) self.assertFalse(client.has_metadata_for_topic('topic_doesnt_exist')) # Topic with partition metadata, but no leaders return True self.assertTrue(client.has_metadata_for_topic('topic_noleaders'))
def assert_message_count(self, topic, check_count, timeout=10, partitions=None, at_least=False): hosts = ','.join(['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) client = SimpleClient(hosts, timeout=2) consumer = SimpleConsumer(client, None, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = -1 while pending < check_count and (time.time() - started_at < timeout): try: pending = consumer.pending(partitions) except FailedPayloadsError: pass time.sleep(0.5) consumer.stop() client.close() if pending < check_count: self.fail('Too few pending messages: found %d, expected %d' % (pending, check_count)) elif pending > check_count and not at_least: self.fail('Too many pending messages: found %d, expected %d' % (pending, check_count)) return True
def test_send_broker_unaware_request(self): mocked_conns = { ('kafka01', 9092): MagicMock(), ('kafka02', 9092): MagicMock(), ('kafka03', 9092): MagicMock() } # inject BrokerConnection side effects mock_conn(mocked_conns[('kafka01', 9092)], success=False) mock_conn(mocked_conns[('kafka03', 9092)], success=False) future = Future() mocked_conns[('kafka02', 9092)].send.return_value = future mocked_conns[('kafka02', 9092)].recv.return_value = [('valid response', future)] def mock_get_conn(host, port, afi): return mocked_conns[(host, port)] # patch to avoid making requests before we want it with patch.object(SimpleClient, 'load_metadata_for_topics'): with patch.object(SimpleClient, '_get_conn', side_effect=mock_get_conn): client = SimpleClient(hosts='kafka01:9092,kafka02:9092') resp = client._send_broker_unaware_request(payloads=['fake request'], encoder_fn=MagicMock(), decoder_fn=lambda x: x) self.assertEqual('valid response', resp) mocked_conns[('kafka02', 9092)].recv.assert_called_once_with()
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None zk = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic if self.create_client: self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): try: offsets, = self.client.send_offset_request([OffsetRequestPayload(topic, partition, -1, 1)]) except: # XXX: We've seen some UnknownErrors here and cant debug w/o server logs self.zk.child.dump_logs() self.server.child.dump_logs() raise else: return offsets.offsets[0] def msgs(self, iterable): return [ self.msg(x) for x in iterable ] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
def test_send_produce_request_raises_when_topic_unknown(self, protocol, conn): mock_conn(conn) brokers = [BrokerMetadata(0, "broker_1", 4567, None), BrokerMetadata(1, "broker_2", 5678, None)] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [(UNKNOWN_TOPIC_OR_PARTITION, "topic_doesnt_exist", [])] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) client = SimpleClient(hosts=["broker_1:4567"]) requests = [ProduceRequestPayload("topic_doesnt_exist", 0, [create_message("a"), create_message("b")])] with self.assertRaises(FailedPayloadsError): client.send_produce_request(requests)
def test_ensure_topic_exists(self, decode_metadata_response, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567, None), BrokerMetadata(1, 'broker_2', 5678, None) ] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [ (NO_LEADER, 'topic_still_creating', []), (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []), (NO_ERROR, 'topic_noleaders', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), ] decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) with self.assertRaises(UnknownTopicOrPartitionError): client.ensure_topic_exists('topic_doesnt_exist', timeout=1) with self.assertRaises(KafkaTimeoutError): client.ensure_topic_exists('topic_still_creating', timeout=1) # This should not raise client.ensure_topic_exists('topic_noleaders', timeout=1)
def test_get_leader_exceptions_when_noleader(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ (NO_ERROR, 'topic_noleader', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) self.assertDictEqual( { TopicPartition('topic_noleader', 0): None, TopicPartition('topic_noleader', 1): None }, client.topics_to_brokers) # No leader partitions -- raise LeaderNotAvailableError with self.assertRaises(LeaderNotAvailableError): self.assertIsNone(client._get_leader_for_partition('topic_noleader', 0)) with self.assertRaises(LeaderNotAvailableError): self.assertIsNone(client._get_leader_for_partition('topic_noleader', 1)) # Unknown partitions -- raise UnknownTopicOrPartitionError with self.assertRaises(UnknownTopicOrPartitionError): self.assertIsNone(client._get_leader_for_partition('topic_noleader', 2)) topics = [ (NO_ERROR, 'topic_noleader', [ (NO_ERROR, 0, 0, [0, 1], [0, 1]), (NO_ERROR, 1, 1, [1, 0], [1, 0]) ]), ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) self.assertEqual(brokers[0], client._get_leader_for_partition('topic_noleader', 0)) self.assertEqual(brokers[1], client._get_leader_for_partition('topic_noleader', 1))
def topic_offsets(kafka_brokers, topic): client = SimpleClient(insure_is_array(kafka_brokers)) topic_partitions = client.topic_partitions if topic not in topic_partitions: raise KafkaException("topic {} doesn't exists".format(topic)) partitions = topic_partitions[topic] offset_requests = [ OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys() ] offsets_responses = client.send_offset_request(offset_requests) client.close() partitions_and_offsets = {} for offset in offsets_responses: if offset.topic == topic: topic_offset = 0 topic_partition = TopicPartition(topic=offset.topic, partition=offset.partition) if offset.offsets[0]: topic_offset = offset.offsets[0] partitions_and_offsets[topic_partition] = topic_offset return partitions_and_offsets
def test_send_broker_unaware_request_fail(self, load_metadata, conn): mocked_conns = { ('kafka01', 9092): MagicMock(), ('kafka02', 9092): MagicMock() } for val in mocked_conns.values(): mock_conn(val, success=False) def mock_get_conn(host, port, afi): return mocked_conns[(host, port)] conn.side_effect = mock_get_conn client = SimpleClient(hosts=['kafka01:9092', 'kafka02:9092']) req = KafkaProtocol.encode_metadata_request() with self.assertRaises(KafkaUnavailableError): client._send_broker_unaware_request(payloads=['fake request'], encoder_fn=MagicMock(return_value='fake encoded message'), decoder_fn=lambda x: x) for key, conn in six.iteritems(mocked_conns): conn.send.assert_called_with('fake encoded message')
def test_send_produce_request_raises_when_topic_unknown(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []), ] protocol.decode_metadata_response.return_value = MetadataResponse[0](brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) requests = [ProduceRequestPayload( "topic_doesnt_exist", 0, [create_message("a"), create_message("b")])] with self.assertRaises(FailedPayloadsError): client.send_produce_request(requests)
def test_send_produce_request_raises_when_topic_unknown(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []), ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) requests = [ProduceRequestPayload( "topic_doesnt_exist", 0, [create_message("a"), create_message("b")])] with self.assertRaises(UnknownTopicOrPartitionError): client.send_produce_request(requests)
def create_topic(self): client = SimpleClient(self.BOOTSTRAP_SERVER) broker_topics = client.topic_partitions admin_client = KafkaAdminClient(bootstrap_servers=self.BOOTSTRAP_SERVER, client_id='test') if self.TOPIC_NAME and self.TOPIC_NAME not in broker_topics: topic_list = [NewTopic(name=self.TOPIC_NAME, num_partitions=self.NUM_PARTITIONS, replication_factor=self.REPLICATION_FACTOR)] try: admin_client.create_topics(new_topics=topic_list, validate_only=False) except Exception: raise Exception('Unable to create topic') elif self.TOPIC_NAME and self.TOPIC_NAME in broker_topics: print('Topic already created')
def test_load_metadata(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567, None), BrokerMetadata(1, 'broker_2', 5678, None) ] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [ (NO_ERROR, 'topic_1', [ (NO_ERROR, 0, 1, [1, 2], [1, 2]) ]), (NO_ERROR, 'topic_noleader', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), (NO_LEADER, 'topic_no_partitions', []), (UNKNOWN_TOPIC_OR_PARTITION, 'topic_unknown', []), (NO_ERROR, 'topic_3', [ (NO_ERROR, 0, 0, [0, 1], [0, 1]), (NO_ERROR, 1, 1, [1, 0], [1, 0]), (NO_ERROR, 2, 0, [0, 1], [0, 1]) ]) ] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) # client loads metadata at init client = SimpleClient(hosts=['broker_1:4567']) self.assertDictEqual({ TopicPartition('topic_1', 0): brokers[1], TopicPartition('topic_noleader', 0): None, TopicPartition('topic_noleader', 1): None, TopicPartition('topic_3', 0): brokers[0], TopicPartition('topic_3', 1): brokers[1], TopicPartition('topic_3', 2): brokers[0]}, client.topics_to_brokers) # if we ask for metadata explicitly, it should raise errors with self.assertRaises(LeaderNotAvailableError): client.load_metadata_for_topics('topic_no_partitions') with self.assertRaises(UnknownTopicOrPartitionError): client.load_metadata_for_topics('topic_unknown') # This should not raise client.load_metadata_for_topics('topic_no_leader')
def setUp(self): if not os.environ.get('KAFKA_VERSION'): self.skipTest('integration test requires KAFKA_VERSION') zk_chroot = random_string(10) replicas = 3 partitions = 3 # mini zookeeper, 3 kafka brokers self.zk = ZookeeperFixture.instance() kk_kwargs = { 'zk_chroot': zk_chroot, 'replicas': replicas, 'partitions': partitions } self.brokers = [ KafkaFixture.instance(i, self.zk, **kk_kwargs) for i in range(replicas) ] hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers] self.client = SimpleClient(hosts, timeout=2) super(TestFailover, self).setUp()
def test_send_broker_unaware_request(self): mocked_conns = {("kafka01", 9092): MagicMock(), ("kafka02", 9092): MagicMock(), ("kafka03", 9092): MagicMock()} # inject BrokerConnection side effects mock_conn(mocked_conns[("kafka01", 9092)], success=False) mock_conn(mocked_conns[("kafka03", 9092)], success=False) future = Future() mocked_conns[("kafka02", 9092)].send.return_value = future mocked_conns[("kafka02", 9092)].recv.side_effect = lambda: future.success("valid response") def mock_get_conn(host, port, afi): return mocked_conns[(host, port)] # patch to avoid making requests before we want it with patch.object(SimpleClient, "load_metadata_for_topics"): with patch.object(SimpleClient, "_get_conn", side_effect=mock_get_conn): client = SimpleClient(hosts="kafka01:9092,kafka02:9092") resp = client._send_broker_unaware_request( payloads=["fake request"], encoder_fn=MagicMock(), decoder_fn=lambda x: x ) self.assertEqual("valid response", resp) mocked_conns[("kafka02", 9092)].recv.assert_called_once_with()
def replicationproducer(self): """Obtain a ``Producer`` instance to write to the replication log.""" if not getattr(self, '_replicationproducer', None): client = SimpleClient(hosts, client_id=clientid, timeout=timeout) self._replicationproducer = vcsrproducer.Producer( client, topic, batch_send=False, req_acks=reqacks, ack_timeout=acktimeout) return self._replicationproducer
def _get_highwater_offsets(self, kafka_hosts_ports): """ Fetch highwater offsets for each topic/partition from Kafka cluster. Do this for all partitions in the cluster because even if it has no consumers, we may want to measure whether producers are successfully producing. No need to limit this for performance because fetching broker offsets from Kafka is a relatively inexpensive operation. """ kafka_conn = SimpleClient(kafka_hosts_ports, timeout=self.kafka_timeout) try: broker_topics_partitions = kafka_conn.topics_to_brokers.keys() # batch a bunch of requests into a single network call offsets_request = [OffsetRequestPayload(topic, partition, -1, 1) for topic, partition in broker_topics_partitions] offsets_response = kafka_conn.send_offset_request(offsets_request) highwater_offsets = {(x.topic, x.partition): x.offsets[0] for x in offsets_response} finally: try: kafka_conn.close() except Exception: self.log.exception('Error cleaning up Kafka connection') return highwater_offsets
def __init__(self, queue, kf_ip_port='localhost', zk_ip_port='localhost', sleep_time=10): # 连接 kafka self.kafka_hosts = kf_ip_port self.broker = SimpleClient(hosts=self.kafka_hosts) # 连接zookeeper self.zookeepers_hosts = zk_ip_port self.zk = KazooClient(hosts=self.zookeepers_hosts, read_only=True) # 数据存放 self.queue = queue # 时间间隔 self.sleep_time = sleep_time - 1
def test_send_produce_request_raises_when_noleader(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ (NO_ERROR, 'topic_noleader', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), ] protocol.decode_metadata_response.return_value = MetadataResponse[0](brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) requests = [ProduceRequestPayload( "topic_noleader", 0, [create_message("a"), create_message("b")])] with self.assertRaises(FailedPayloadsError): client.send_produce_request(requests)
def test_get_leader_for_partitions_reloads_metadata(self, protocol, conn): "Get leader for partitions reload metadata if it is not available" mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567, None), BrokerMetadata(1, 'broker_2', 5678, None) ] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [ (NO_LEADER, 'topic_no_partitions', []) ] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) # topic metadata is loaded but empty self.assertDictEqual({}, client.topics_to_brokers) topics = [ (NO_ERROR, 'topic_one_partition', [ (NO_ERROR, 0, 0, [0, 1], [0, 1]) ]) ] protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics) # calling _get_leader_for_partition (from any broker aware request) # will try loading metadata again for the same topic leader = client._get_leader_for_partition('topic_one_partition', 0) self.assertEqual(brokers[0], leader) self.assertDictEqual({ TopicPartition('topic_one_partition', 0): brokers[0]}, client.topics_to_brokers)
def assert_message_count(self, topic, check_count, timeout=10, partitions=None, at_least=False): hosts = ','.join( ['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) client = SimpleClient(hosts, timeout=2) consumer = SimpleConsumer(client, None, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = -1 while pending < check_count and (time.time() - started_at < timeout): try: pending = consumer.pending(partitions) except FailedPayloadsError: pass time.sleep(0.5) consumer.stop() client.close() if pending < check_count: self.fail('Too few pending messages: found %d, expected %d' % (pending, check_count)) elif pending > check_count and not at_least: self.fail('Too many pending messages: found %d, expected %d' % (pending, check_count)) return True
def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic if self.create_client: self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) self._messages = {}
def test_send_produce_request_raises_when_noleader(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [ (NO_ERROR, 'topic_noleader', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), ] protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) requests = [ProduceRequestPayload( "topic_noleader", 0, [create_message("a"), create_message("b")])] with self.assertRaises(LeaderNotAvailableError): client.send_produce_request(requests)
def send_to_kafka(message): producer = get_producer() try: producer.send(settings.KAFKA_TOPIC, message) except: client = SimpleClient(hosts=settings.KAFKA_SERVERS) client.ensure_topic_exists(settings.KAFKA_TOPIC) client.close() producer.send(settings.KAFKA_TOPIC, message) producer.close(10)
def getTopics(self, once_sleep=60): ''' 获取需要消费的topic,可通过数据库表控制 :param once_sleep: topics更新频率 :return: ''' while True: if self.debug: debug_topic = self.configures.get("debugconf", "debug_topic") self.topics = [(item, "debug") for item in debug_topic.split(",")] else: kafka_topics = set() saas_appkey = set() appkey_logpath = {} try: from kafka import SimpleClient hostname = self.configures.get("kafka", "hostname") client = SimpleClient(hosts=hostname) for topic in client.topics: kafka_topics.add(topic) client.close() log.info("get kafka topics: %s" % json.dumps(list(kafka_topics))) except: logging.error(sys.exc_info()) continue try: client = MysqlClient("saas_server") topics = client.getTopics(group_id=self.group_id) for topic, logpath in topics: saas_appkey.add(topic) appkey_logpath.setdefault(topic, set()).add(logpath) client.closeMysql() log.info("get mysql appkeys: %s" % json.dumps(list(saas_appkey))) except: logging.error(sys.exc_info()) continue self.topics = [(topic, logpath) for topic in list(kafka_topics & saas_appkey) for logpath in appkey_logpath[topic]] log.info("current topics: %s" % json.dumps(self.topics)) time.sleep(once_sleep)
def setUp(self): if not os.environ.get('KAFKA_VERSION'): self.skipTest('integration test requires KAFKA_VERSION') zk_chroot = random_string(10) replicas = 3 partitions = 3 # mini zookeeper, 3 kafka brokers self.zk = ZookeeperFixture.instance() kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions] self.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)] hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers] self.client = SimpleClient(hosts, timeout=2) super(TestFailover, self).setUp()
def save_to_kafka(taobao_total_data): # 保存数据到kafka num = 1 while num < 3: try: kafka_client = SimpleClient('{}:{}'.format(settings.KAFKA_IP, settings.KAFKA_PORT)) producer = SimpleProducer(kafka_client) taobao_total_data = json.dumps(taobao_total_data) producer.send_messages('{}'.format(settings.KAFKA_TOPIC), taobao_total_data.encode("utf8")) except Exception as e: num += 1 logger.error(e) else: break
def test_get_leader_exceptions_when_noleader(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567, None), BrokerMetadata(1, 'broker_2', 5678, None) ] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [ (NO_ERROR, 'topic_noleader', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), ] protocol.decode_metadata_response.return_value = MetadataResponse[0]( resp0_brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) self.assertDictEqual( { TopicPartition('topic_noleader', 0): None, TopicPartition('topic_noleader', 1): None }, client.topics_to_brokers) # No leader partitions -- raise LeaderNotAvailableError with self.assertRaises(LeaderNotAvailableError): self.assertIsNone( client._get_leader_for_partition('topic_noleader', 0)) with self.assertRaises(LeaderNotAvailableError): self.assertIsNone( client._get_leader_for_partition('topic_noleader', 1)) # Unknown partitions -- raise UnknownTopicOrPartitionError with self.assertRaises(UnknownTopicOrPartitionError): self.assertIsNone( client._get_leader_for_partition('topic_noleader', 2)) topics = [ (NO_ERROR, 'topic_noleader', [(NO_ERROR, 0, 0, [0, 1], [0, 1]), (NO_ERROR, 1, 1, [1, 0], [1, 0])]), ] protocol.decode_metadata_response.return_value = MetadataResponse[0]( resp0_brokers, topics) self.assertEqual(brokers[0], client._get_leader_for_partition('topic_noleader', 0)) self.assertEqual(brokers[1], client._get_leader_for_partition('topic_noleader', 1))
def wait_for_kafka(hostport, timeout=60): """Wait for Kafka to start responding on the specified host:port string.""" # Delay import to facilitate module use in limited virtualenvs. from kafka import SimpleClient start = time.time() while True: try: SimpleClient(hostport, client_id=b'dummy', timeout=1) return except Exception: pass if time.time() - start > timeout: raise Exception('Timeout reached waiting for Kafka') time.sleep(0.1)
def test_producer_sync_fail_on_error(self): error = FailedPayloadsError('failure') with patch.object(SimpleClient, 'load_metadata_for_topics'): with patch.object(SimpleClient, 'ensure_topic_exists'): with patch.object(SimpleClient, 'get_partition_ids_for_topic', return_value=[0, 1]): with patch.object(SimpleClient, '_send_broker_aware_request', return_value = [error]): client = SimpleClient(MagicMock()) producer = SimpleProducer(client, async_send=False, sync_fail_on_error=False) # This should not raise (response,) = producer.send_messages('foobar', b'test message') self.assertEqual(response, error) producer = SimpleProducer(client, async_send=False, sync_fail_on_error=True) with self.assertRaises(FailedPayloadsError): producer.send_messages('foobar', b'test message')
def __init__(self): config = ConfigReader("config.json") auth = OAuthHandler(config.get_key("CONSUMER_KEY"), config.get_key("CONSUMER_SECRET")) auth.set_access_token(config.get_key("ACCESS_TOKEN_KEY"), config.get_key("ACCESS_TOKEN_SECRET")) self.api = tweepy.API(auth_handler=auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) self.rate_limits = self.tweep_rate_limits_to_dictionary( self.api.rate_limit_status()) self.scrapers = [] kafka_url = "{:s}:{:s}".format(config.get_key("KAFKA_HOST"), config.get_key("KAFKA_PORT")) try: kafka = SimpleClient(kafka_url, timeout=60) except KafkaUnavailableError as e: logging.error("Could not connect to Kafka2") raise e self.producer = SimpleProducer(kafka)
def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic if self.create_client: self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port)) timeout = time.time() + 30 while time.time() < timeout: try: self.client.load_metadata_for_topics(self.topic, ignore_leadernotavailable=False) if self.client.has_metadata_for_topic(topic): break except (LeaderNotAvailableError, InvalidTopicError): time.sleep(1) else: raise KafkaTimeoutError('Timeout loading topic metadata!') # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors # TODO: It might be a good idea to move this to self.client.ensure_topic_exists for partition in self.client.get_partition_ids_for_topic(self.topic): while True: try: req = OffsetRequestPayload(self.topic, partition, -1, 100) self.client.send_offset_request([req]) break except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e: if time.time() > timeout: raise KafkaTimeoutError('Timeout loading topic metadata!') time.sleep(.1) self._messages = {}
def refresh_graph_data1(): global list_result1, username1 user_timeline1 = twitter.get_user_timeline(screen_name=username1, count=20) result1 = sentiment_analysis(user_timeline1) list_result1 = [result1.pos1, result1.neut1, result1.neg1] for tweet in user_timeline1: try: client = SimpleClient("localhost:9092") producer = SimpleProducer(client, async=False, batch_send_every_n=10, batch_send_every_t=2) print(tweet['text']) msg = tweet['text'].encode('utf-8') producer.send_messages(topic_name, msg) # producer.send('test', key=bytes('tweet', encoding='utf-8'), value=bytes(tweet['text'],encoding='utf-8')) # producer.flush() print('publish success') except Exception as ex: print('Exception in publishing message') print(str(ex)) time.sleep(30) # list_result = [10,10,10] return jsonify(sResult1=list_result1)
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None zk = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic if self.create_client: self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port)) timeout = time.time() + 30 while time.time() < timeout: try: self.client.load_metadata_for_topics(self.topic, ignore_leadernotavailable=False) if self.client.has_metadata_for_topic(topic): break except (LeaderNotAvailableError, InvalidTopicError): time.sleep(1) else: raise KafkaTimeoutError('Timeout loading topic metadata!') # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors # TODO: It might be a good idea to move this to self.client.ensure_topic_exists for partition in self.client.get_partition_ids_for_topic(self.topic): while True: try: req = OffsetRequestPayload(self.topic, partition, -1, 100) self.client.send_offset_request([req]) break except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e: if time.time() > timeout: raise KafkaTimeoutError('Timeout loading topic metadata!') time.sleep(.1) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): try: offsets, = self.client.send_offset_request([OffsetRequestPayload(topic, partition, -1, 1)]) except Exception: # XXX: We've seen some UnknownErrors here and can't debug w/o server logs self.zk.child.dump_logs() self.server.child.dump_logs() raise else: return offsets.offsets[0] def msgs(self, iterable): return [self.msg(x) for x in iterable] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
class FeedManager(FlaskView): dockerClient = docker.from_env() mongoClient = pymongo.MongoClient(**mongo_params) forms: Database = mongoClient[os.getenv("FORM_DATABASE", "forms")] feeds: Database = mongoClient[os.getenv("PARAMETER_DATABASE", "params")] parameter_stats: Database = mongoClient[os.getenv("PARAM_STATS_DATABASE", "params_stats")] parameterSchemas = forms['parameterSchemas'] admin = KafkaAdminClient(**kafka_params) kafkaClient = SimpleClient(hosts=kafka_params.get("bootstrap_servers")[0]) feed_params: Database = mongoClient[os.getenv("PARAMETER_DATABASE", "params")] feed_ports = {name.get("name"): 8000+i for (i, name) in enumerate(feeds["feed"].find({}))} def getParameter(self, collection, name): params = self.feed_params[collection].find_one(filter={"name": name}) if params is None: return Response(status=404) params.pop("_id") return Response(json.dumps(params), mimetype="application/json") def getParameterStatus(self, feedName): c = self.parameterSchemas.find({}) payload = [] for parameterName in [param.get("name") for param in c]: errors = self.parameter_stats[parameterName].count({"name": feedName}) status = { "errors": errors, "name": parameterName } payload.append(status) return Response(json.dumps(payload), mimetype='application/json') def getParameterTypes(self): c = self.parameterSchemas.find({}) data = [param.get("name") for param in c] return Response(json.dumps(data), mimetype="application/json") def getParameterSchema(self, parameterName): parameter = self.parameterSchemas.find_one({"name": parameterName}) val = parameter['value'] return Response(json.dumps(val), mimetype="application/json") @route("/setParameter/<string:collection>/<string:name>", methods=['PUT']) def setParameter(self, collection, name=None): value = request.get_json() param: dict = self.feed_params[collection].find_one({"name": name}) value.update({"name": name}) old = param if param is not None: self.feed_params[collection].replace_one(filter={"name": name}, replacement=value) old["name"] = "{}_{}".format(name, datetime.now().strftime("%d%m%Y")) old.pop("_id") self.feed_params[collection].insert(old) else: self.feed_params[collection].insert_one(value) return Response("ok", status=200) def getFeeds(self): c = self.feeds["feed"].find({}) data = [param.get("name") for param in c] return Response(json.dumps(data), mimetype="application/json") def newFeed(self, feedName): port = len(self.feed_ports) self.feed_ports.update({feedName: 8000 + port}) c = self.feeds["feed"].find({"name": feedName}) if any(val == feedName for val in c): pass else: self.feeds["feed"].insert_one({"name": feedName}) return "ok" def startFeed(self, feedName): logging.info("starting feed {}".format(feedName)) parameterSets = self.feeds.list_collection_names(include_system_collections=False) notSet = [] for set in parameterSets: if self.feeds[set].find_one({"name": feedName}) is None: notSet.append(set) if len(notSet): payload = {"notSet": notSet, "status": False} return Response(json.dumps(payload), mimetype='application/json') else: try: queues_to_make = [] queues_to_make.append( NewTopic(name="{}-results".format(feedName), num_partitions=1, replication_factor=1)) queues_to_make.append( NewTopic(name="{}-items".format(feedName), num_partitions=1, replication_factor=1)) self.admin.create_topics(queues_to_make) except TopicAlreadyExistsError: pass try: feed = self.dockerClient.containers.get(feedName) feed.start() except APIError as e: with open("./docker.env") as file: string = file.read() env_vars = list(filter(lambda item: item is not "", string.split("\n"))) image = self.dockerClient.images.get(feed_params['image']) feed: Container = self.dockerClient.containers.run(image, environment=["NAME={}".format(feedName), 'BROWSER_PORT={}'.format(self.feed_ports.get(feedName))] + env_vars, detach=True, name=feedName, restart_policy={"Name": 'always'}, network=os.getenv("NETWORK", "car_default")) return Response(json.dumps({"status": True}), status=200) def stopFeed(self, feedName): feed = self.dockerClient.containers.get(feedName) feed.stop() feed.remove() self.admin.delete_topics(["{}-{}".format(feedName, val) for val in ("items", "results")]) return "ok" def feedStatus(self, feedName): try: feed = self.dockerClient.containers.get(feedName) if feed.status == 'running': status = True else: status = False except APIError as e: status = False return Response(json.dumps({"status": status}), mimetype='application/json')
def __init__(self, api=None): # create kafka producer and bind it to listener super(MyStreamListener, self).__init__(api) client = SimpleClient('localhost:9092') producer = SimpleProducer(client) self.kafProducer = producer
# -*- coding: utf-8 -*- from kafka import SimpleClient, SimpleProducer, KafkaConsumer kafka = SimpleClient( "192.168.6.51 192.168.6.52 192.168.6.53 192.168.6.54 192.168.6.55") producer = SimpleProducer(kafka) kafka.close()
r.data_source.data['x'] = range(len(list(df['value'])))[-WINDOW_SIZE:] dots.data_source.data['y'] = list(df['value'])[-WINDOW_SIZE:] dots.data_source.data['x'] = range(len(list( df['value'])))[-WINDOW_SIZE:] else: r.data_source.data['y'] = list(df['value']) r.data_source.data['x'] = range(len(list(df['value']))) dots.data_source.data['y'] = list(df['value']) dots.data_source.data['x'] = range(len(list(df['value']))) # A Kafka consumer listens for messages on the 'wave' topic and plots # up-to-date results in a Bokeh plot if __name__ == '__main__': # Initiate connection to Kafka (consumer) and Redis client = SimpleClient('localhost:9092') consumer = SimpleConsumer(client, None, 'wave') # push this plotting session to Bokeh page session = push_session(curdoc()) # dataframe that is updated with all new data df = pd.DataFrame(columns=['time', 'value']) # data vars time, value = [0], [0] # figure that is updated with new data plot = figure() r = plot.line(time, value) dots = plot.circle(time, value, size=1, color='navy')
def simple_client(kafka_broker): return SimpleClient(get_connect_str(kafka_broker))
def spoorer(self): #连接kafka,获取topics try: kafka_client = SimpleClient(self.kafka_hosts, timeout=self.timeout) # print kafka_client.topics except Exception as e: print "Error, cannot connect kafka broker." sys.exit(1) else: kafka_topics = kafka_client.topics finally: kafka_client.close() #连接zk,获取当前消费进度current offset try: zookeeper_client = KazooClient(hosts=self.zookeeper_hosts, read_only=True, timeout=self.timeout) zookeeper_client.start() except Exception as e: print "Error, cannot connect zookeeper server." sys.exit(1) try: groups = map(str,zookeeper_client.get_children(self.zookeeper_url + 'consumers')) except NoNodeError as e: print "Error, invalid zookeeper url." zookeeper_client.stop() sys.exit(2) else: for group in groups: if 'offsets' not in zookeeper_client.get_children(self.zookeeper_url + 'consumers/%s' % group):continue topic_path = 'consumers/%s/offsets' % (group) topics = map(str,zookeeper_client.get_children(self.zookeeper_url + topic_path)) if len(topics) == 0: continue for topic in topics: # print topic # print self.white_topic_group.keys() if topic not in self.white_topic_group.keys(): continue # elif group not in self.white_topic_group[topic].replace(' ','').split(','): # continue partition_path = 'consumers/%s/offsets/%s' % (group,topic) partitions = map(int,zookeeper_client.get_children(self.zookeeper_url + partition_path)) for partition in partitions: base_path = 'consumers/%s/%s/%s/%s' % (group, '%s', topic, partition) owner_path, offset_path = base_path % 'owners', base_path % 'offsets' offset = zookeeper_client.get(self.zookeeper_url + offset_path)[0] try: owner = zookeeper_client.get(self.zookeeper_url + owner_path)[0] except NoNodeError as e: owner = 'null' #消费进度放在字典metric中 metric = {'datetime':time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'topic':topic, 'group':group, 'partition':int(partition), 'logsize':None, 'offset':int(offset), 'lag':None, 'owner':owner} self.result.append(metric) finally: zookeeper_client.stop() #获取每个分片的logsize try: client = SimpleClient(self.kafka_hosts) except Exception as e: print "Error, cannot connect kafka broker." sys.exit(1) else: for kafka_topic in kafka_topics: self.kafka_logsize[kafka_topic] = {} partitions = client.topic_partitions[kafka_topic] offset_requests = [OffsetRequestPayload(kafka_topic, p, -1, 1) for p in partitions.keys()] offsets_responses = client.send_offset_request(offset_requests) for r in offsets_responses: self.kafka_logsize[kafka_topic][r.partition] = r.offsets[0] #logsize减去current offset等于lag f1 = open(self.log_file,'w') # f2 = open(self.log_day_file,'a') # print self.result for metric in self.result: logsize = self.kafka_logsize[metric['topic']][metric['partition']] metric['logsize'] = int(logsize) metric['lag'] = int(logsize) - int(metric['offset']) f1.write(json.dumps(metric,sort_keys=True) + '\n') f1.flush() # f2.write(json.dumps(metric,sort_keys=True) + '\n') # f2.flush() # finally: f1.close() client.close()
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None zk = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic if self.create_client: self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port)) timeout = time.time() + 30 while time.time() < timeout: try: self.client.load_metadata_for_topics( self.topic, ignore_leadernotavailable=False) if self.client.has_metadata_for_topic(topic): break except (LeaderNotAvailableError, InvalidTopicError): time.sleep(1) else: raise KafkaTimeoutError('Timeout loading topic metadata!') # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors # TODO: It might be a good idea to move this to self.client.ensure_topic_exists for partition in self.client.get_partition_ids_for_topic(self.topic): while True: try: req = OffsetRequestPayload(self.topic, partition, -1, 100) self.client.send_offset_request([req]) break except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e: if time.time() > timeout: raise KafkaTimeoutError( 'Timeout loading topic metadata!') time.sleep(.1) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): try: offsets, = self.client.send_offset_request( [OffsetRequestPayload(topic, partition, -1, 1)]) except Exception: # XXX: We've seen some UnknownErrors here and can't debug w/o server logs self.zk.child.dump_logs() self.server.child.dump_logs() raise else: return offsets.offsets[0] def msgs(self, iterable): return [self.msg(x) for x in iterable] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
class TestFailover(KafkaIntegrationTestCase): create_client = False def setUp(self): if not os.environ.get('KAFKA_VERSION'): self.skipTest('integration test requires KAFKA_VERSION') zk_chroot = random_string(10) replicas = 3 partitions = 3 # mini zookeeper, 3 kafka brokers self.zk = ZookeeperFixture.instance() kk_args = [self.zk.host, self.zk.port] kk_kwargs = {'zk_chroot': zk_chroot, 'replicas': replicas, 'partitions': partitions} self.brokers = [KafkaFixture.instance(i, *kk_args, **kk_kwargs) for i in range(replicas)] hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers] self.client = SimpleClient(hosts, timeout=2) super(TestFailover, self).setUp() def tearDown(self): super(TestFailover, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return self.client.close() for broker in self.brokers: broker.close() self.zk.close() def test_switch_leader(self): topic = self.topic partition = 0 # Testing the base Producer class here so that we can easily send # messages to a specific partition, kill the leader for that partition # and check that after another broker takes leadership the producer # is able to resume sending messages # require that the server commit messages to all in-sync replicas # so that failover doesn't lose any messages on server-side # and we can assert that server-side message count equals client-side producer = Producer(self.client, async=False, req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT) # Send 100 random messages to a specific partition self._send_random_messages(producer, topic, partition, 100) # kill leader for partition self._kill_leader(topic, partition) # expect failure, but don't wait more than 60 secs to recover recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: log.debug("attempting to send 'success' message after leader killed") producer.send_messages(topic, partition, b'success') log.debug("success!") recovered = True except (FailedPayloadsError, ConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages to new leader self._send_random_messages(producer, topic, partition, 100) # count number of messages # Should be equal to 100 before + 1 recovery + 100 after # at_least=True because exactly once delivery isn't really a thing self.assert_message_count(topic, 201, partitions=(partition,), at_least=True) def test_switch_leader_async(self): topic = self.topic partition = 0 # Test the base class Producer -- send_messages to a specific partition producer = Producer(self.client, async=True, batch_send_every_n=15, batch_send_every_t=3, req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT, async_log_messages_on_error=False) # Send 10 random messages self._send_random_messages(producer, topic, partition, 10) self._send_random_messages(producer, topic, partition + 1, 10) # kill leader for partition self._kill_leader(topic, partition) log.debug("attempting to send 'success' message after leader killed") # in async mode, this should return immediately producer.send_messages(topic, partition, b'success') producer.send_messages(topic, partition + 1, b'success') # send to new leader self._send_random_messages(producer, topic, partition, 10) self._send_random_messages(producer, topic, partition + 1, 10) # Stop the producer and wait for it to shutdown producer.stop() started = time.time() timeout = 60 while (time.time() - started) < timeout: if not producer.thread.is_alive(): break time.sleep(0.1) else: self.fail('timeout waiting for producer queue to empty') # count number of messages # Should be equal to 10 before + 1 recovery + 10 after # at_least=True because exactly once delivery isn't really a thing self.assert_message_count(topic, 21, partitions=(partition,), at_least=True) self.assert_message_count(topic, 21, partitions=(partition + 1,), at_least=True) def test_switch_leader_keyed_producer(self): topic = self.topic producer = KeyedProducer(self.client, async=False) # Send 10 random messages for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) # kill leader for partition 0 self._kill_leader(topic, 0) recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) if producer.partitioners[topic].partition(key) == 0: recovered = True except (FailedPayloadsError, ConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages just to make sure no more exceptions for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) def test_switch_leader_simple_consumer(self): producer = Producer(self.client, async=False) consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10) self._send_random_messages(producer, self.topic, 0, 2) consumer.get_messages() self._kill_leader(self.topic, 0) consumer.get_messages() def _send_random_messages(self, producer, topic, partition, n): for j in range(n): msg = 'msg {0}: {1}'.format(j, random_string(10)) log.debug('_send_random_message %s to %s:%d', msg, topic, partition) while True: try: producer.send_messages(topic, partition, msg.encode('utf-8')) except: log.exception('failure in _send_random_messages - retrying') continue else: break def _kill_leader(self, topic, partition): leader = self.client.topics_to_brokers[TopicPartition(topic, partition)] broker = self.brokers[leader.nodeId] broker.close() return broker def assert_message_count(self, topic, check_count, timeout=10, partitions=None, at_least=False): hosts = ','.join(['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) client = SimpleClient(hosts, timeout=2) consumer = SimpleConsumer(client, None, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = -1 while pending < check_count and (time.time() - started_at < timeout): try: pending = consumer.pending(partitions) except FailedPayloadsError: pass time.sleep(0.5) consumer.stop() client.close() if pending < check_count: self.fail('Too few pending messages: found %d, expected %d' % (pending, check_count)) elif pending > check_count and not at_least: self.fail('Too many pending messages: found %d, expected %d' % (pending, check_count)) return True
def simple_client(kafka_broker): connect_str = 'localhost:' + str(kafka_broker.port) return SimpleClient(connect_str)
class TestFailover(KafkaIntegrationTestCase): create_client = False def setUp(self): if not os.environ.get('KAFKA_VERSION'): self.skipTest('integration test requires KAFKA_VERSION') zk_chroot = random_string(10) replicas = 3 partitions = 3 # mini zookeeper, 3 kafka brokers self.zk = ZookeeperFixture.instance() kk_kwargs = { 'zk_chroot': zk_chroot, 'replicas': replicas, 'partitions': partitions } self.brokers = [ KafkaFixture.instance(i, self.zk, **kk_kwargs) for i in range(replicas) ] hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers] self.client = SimpleClient(hosts, timeout=2) super(TestFailover, self).setUp() def tearDown(self): super(TestFailover, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return self.client.close() for broker in self.brokers: broker.close() self.zk.close() def test_switch_leader(self): topic = self.topic partition = 0 # Testing the base Producer class here so that we can easily send # messages to a specific partition, kill the leader for that partition # and check that after another broker takes leadership the producer # is able to resume sending messages # require that the server commit messages to all in-sync replicas # so that failover doesn't lose any messages on server-side # and we can assert that server-side message count equals client-side producer = Producer(self.client, async_send=False, req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT) # Send 100 random messages to a specific partition self._send_random_messages(producer, topic, partition, 100) # kill leader for partition self._kill_leader(topic, partition) # expect failure, but don't wait more than 60 secs to recover recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: log.debug( "attempting to send 'success' message after leader killed") producer.send_messages(topic, partition, b'success') log.debug("success!") recovered = True except (FailedPayloadsError, KafkaConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages to new leader self._send_random_messages(producer, topic, partition, 100) # count number of messages # Should be equal to 100 before + 1 recovery + 100 after # at_least=True because exactly once delivery isn't really a thing self.assert_message_count(topic, 201, partitions=(partition, ), at_least=True) def test_switch_leader_async(self): topic = self.topic partition = 0 # Test the base class Producer -- send_messages to a specific partition producer = Producer(self.client, async_send=True, batch_send_every_n=15, batch_send_every_t=3, req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT, async_log_messages_on_error=False) # Send 10 random messages self._send_random_messages(producer, topic, partition, 10) self._send_random_messages(producer, topic, partition + 1, 10) # kill leader for partition self._kill_leader(topic, partition) log.debug("attempting to send 'success' message after leader killed") # in async mode, this should return immediately producer.send_messages(topic, partition, b'success') producer.send_messages(topic, partition + 1, b'success') # send to new leader self._send_random_messages(producer, topic, partition, 10) self._send_random_messages(producer, topic, partition + 1, 10) # Stop the producer and wait for it to shutdown producer.stop() started = time.time() timeout = 60 while (time.time() - started) < timeout: if not producer.thread.is_alive(): break time.sleep(0.1) else: self.fail('timeout waiting for producer queue to empty') # count number of messages # Should be equal to 10 before + 1 recovery + 10 after # at_least=True because exactly once delivery isn't really a thing self.assert_message_count(topic, 21, partitions=(partition, ), at_least=True) self.assert_message_count(topic, 21, partitions=(partition + 1, ), at_least=True) def test_switch_leader_keyed_producer(self): topic = self.topic producer = KeyedProducer(self.client, async_send=False) # Send 10 random messages for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) # kill leader for partition 0 self._kill_leader(topic, 0) recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) if producer.partitioners[topic].partition(key) == 0: recovered = True except (FailedPayloadsError, KafkaConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages just to make sure no more exceptions for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) def test_switch_leader_simple_consumer(self): producer = Producer(self.client, async_send=False) consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10) self._send_random_messages(producer, self.topic, 0, 2) consumer.get_messages() self._kill_leader(self.topic, 0) consumer.get_messages() def _send_random_messages(self, producer, topic, partition, n): for j in range(n): msg = 'msg {0}: {1}'.format(j, random_string(10)) log.debug('_send_random_message %s to %s:%d', msg, topic, partition) while True: try: producer.send_messages(topic, partition, msg.encode('utf-8')) except Exception: log.exception( 'failure in _send_random_messages - retrying') continue else: break def _kill_leader(self, topic, partition): leader = self.client.topics_to_brokers[TopicPartition( topic, partition)] broker = self.brokers[leader.nodeId] broker.close() return broker def assert_message_count(self, topic, check_count, timeout=10, partitions=None, at_least=False): hosts = ','.join( ['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) client = SimpleClient(hosts, timeout=2) consumer = SimpleConsumer(client, None, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = -1 while pending < check_count and (time.time() - started_at < timeout): try: pending = consumer.pending(partitions) except FailedPayloadsError: pass time.sleep(0.5) consumer.stop() client.close() if pending < check_count: self.fail('Too few pending messages: found %d, expected %d' % (pending, check_count)) elif pending > check_count and not at_least: self.fail('Too many pending messages: found %d, expected %d' % (pending, check_count)) return True