def test_register_event_handler_store_record_avro_serializer( get_avro_serializer, get_avro_serializer_store): serializer = get_avro_serializer local_store, global_store = get_avro_serializer_store admin_client = KafkaAdminClient(bootstrap_servers='localhost:9092', client_id='test_store_builder') cluster_metadata = ClusterMetadata(bootstrap_servers='localhost:9092') loop = asyncio.get_event_loop() store_builder = StoreBuilder('test_store_builder', 0, 1, 'test-store', serializer, local_store, global_store, 'localhost:9092', cluster_metadata, admin_client, loop, False, False) store_record_handler = StoreRecordHandler(store_builder) serializer.register_event_handler_store_record(StoreRecord, store_record_handler) events = serializer.get_events() found = False for e_name, event in events.items(): if e_name.match('tonga.store.record'): assert event == StoreRecord found = True break assert found found = False handlers = serializer.get_handlers() for e_name, handler in handlers.items(): if e_name.match('tonga.store.record'): assert handler == store_record_handler found = True break assert found
def __init__(self, *, loop, bootstrap_servers='localhost', client_id='aiokafka-' + __version__, metadata_max_age_ms=300000, request_timeout_ms=40000, retry_backoff_ms=100, ssl_context=None, security_protocol='PLAINTEXT', api_version='auto', connections_max_idle_ms=540000): if security_protocol not in ('SSL', 'PLAINTEXT'): raise ValueError("`security_protocol` should be SSL or PLAINTEXT") if security_protocol == "SSL" and ssl_context is None: raise ValueError( "`ssl_context` is mandatory if security_protocol=='SSL'") self._bootstrap_servers = bootstrap_servers self._client_id = client_id self._metadata_max_age_ms = metadata_max_age_ms self._request_timeout_ms = request_timeout_ms self._api_version = api_version self._security_protocol = security_protocol self._ssl_context = ssl_context self._retry_backoff = retry_backoff_ms / 1000 self._connections_max_idle_ms = connections_max_idle_ms self.cluster = ClusterMetadata(metadata_max_age_ms=metadata_max_age_ms) self._topics = set() # empty set will fetch all topic metadata self._conns = {} self._loop = loop self._sync_task = None self._md_update_fut = None self._md_update_waiter = create_future(loop=self._loop) self._get_conn_lock = asyncio.Lock(loop=loop)
def fetch_all_metadata(self): cluster_md = ClusterMetadata( metadata_max_age_ms=self._metadata_max_age_ms) updated = yield from self._metadata_update(cluster_md, []) if not updated: raise KafkaError( 'Unable to get cluster metadata over all known brokers') return cluster_md
def __init__(self, **configs): self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: if key in configs: self.config[key] = configs[key] # these properties need to be set on top of the initialization pipeline # because they are used when __del__ method is called self._closed = False self._wake_r, self._wake_w = socket.socketpair() self._selector = self.config["selector"]() self.cluster = ClusterMetadata(**self.config) self._topics = set() # empty set will fetch all topic metadata self._metadata_refresh_in_progress = False self._conns = Dict() # object to support weakrefs self._api_versions = None self._connecting = set() self._sending = set() self._refresh_on_disconnects = True self._last_bootstrap = 0 self._bootstrap_fails = 0 self._wake_r.setblocking(False) self._wake_w.settimeout(self.config["wakeup_timeout_ms"] / 1000.0) self._wake_lock = threading.Lock() self._lock = threading.RLock() # when requests complete, they are transferred to this queue prior to # invocation. The purpose is to avoid invoking them while holding the # lock above. self._pending_completion = collections.deque() self._selector.register(self._wake_r, selectors.EVENT_READ) self._idle_expiry_manager = IdleConnectionManager( self.config["connections_max_idle_ms"]) self._sensors = None if self.config["metrics"]: self._sensors = KafkaClientMetrics( self.config["metrics"], self.config["metric_group_prefix"], weakref.proxy(self._conns), ) self._num_bootstrap_hosts = len( collect_hosts(self.config["bootstrap_servers"])) # Check Broker Version if not set explicitly if self.config["api_version"] is None: check_timeout = self.config["api_version_auto_timeout_ms"] / 1000 self.config["api_version"] = self.check_version( timeout=check_timeout)
def test_empty_broker_list(): cluster = ClusterMetadata() assert len(cluster.brokers()) == 0 cluster.update_metadata(MetadataResponse[0]([(0, 'foo', 12), (1, 'bar', 34)], [])) assert len(cluster.brokers()) == 2 # empty broker list response should be ignored cluster.update_metadata(MetadataResponse[0]( [], # empty brokers [(17, 'foo', []), (17, 'bar', [])])) # topics w/ error assert len(cluster.brokers()) == 2
def __init__(self, **configs): self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: if key in configs: self.config[key] = configs[key] if self.config['api_version'] is not None: assert self.config['api_version'] in self.API_VERSIONS, ( 'api_version [{0}] must be one of: {1}'.format( self.config['api_version'], str(self.API_VERSIONS))) self.cluster = ClusterMetadata(**self.config) self._topics = set() # empty set will fetch all topic metadata self._metadata_refresh_in_progress = False self._selector = self.config['selector']() self._conns = Dict() # object to support weakrefs self._connecting = set() self._refresh_on_disconnects = True self._last_bootstrap = 0 self._bootstrap_fails = 0 self._wake_r, self._wake_w = socket.socketpair() self._wake_r.setblocking(False) self._wake_lock = threading.Lock() self._lock = threading.RLock() # when requests complete, they are transferred to this queue prior to # invocation. The purpose is to avoid invoking them while holding the # lock above. self._pending_completion = collections.deque() self._selector.register(self._wake_r, selectors.EVENT_READ) self._idle_expiry_manager = IdleConnectionManager( self.config['connections_max_idle_ms']) self._closed = False self._sensors = None if self.config['metrics']: self._sensors = KafkaClientMetrics( self.config['metrics'], self.config['metric_group_prefix'], weakref.proxy(self._conns)) self._bootstrap(collect_hosts(self.config['bootstrap_servers'])) # Check Broker Version if not set explicitly if self.config['api_version'] is None: check_timeout = self.config['api_version_auto_timeout_ms'] / 1000 self.config['api_version'] = self.check_version( timeout=check_timeout)
def __init__(self, client_id: str, bootstrap_servers: Union[str, List[str]] = None, **kwargs) -> None: """ KafkaClient constructor Args: bootstrap_servers (Union[str, List[str]]): ‘host[:port]’ string (or list of ‘host[:port]’ strings) that the producer should contact to bootstrap initial cluster metadata. This does not have to be the full node list. It just needs to have at least one broker that will respond to a Metadata API Request. Default port is 9092. If no servers are specified, will default to localhost:9092. client_id (str): A name for this client. This string is passed in each request to servers and can be used to identify specific server-side log entries that correspond to this client cur_instance: Current service instance nb_replica: Number of service replica Raises: BadArgumentKafkaClient: raised if argument are not valid CurrentInstanceOutOfRange: raised if current instance is highest than replica number KafkaAdminConfigurationError: raised if KafkaAdminClient argument are not valid KafkaClientConnectionErrors: raised if KafkaClient can't connect on kafka or no brokers are available """ super().__init__(**kwargs) if bootstrap_servers is None: self.bootstrap_servers = 'localhost:9092' else: self.bootstrap_servers = bootstrap_servers if isinstance(client_id, str): self.client_id = client_id else: raise BadArgumentKafkaClient try: self._kafka_admin_client = KafkaAdminClient( bootstrap_servers=self.bootstrap_servers, client_id=f'waiter-{self.cur_instance}') self._cluster_metadata = ClusterMetadata( bootstrap_servers=self.bootstrap_servers) except KafkaConfigurationError: raise KafkaAdminConfigurationError except (KafkaConnectionError, NoBrokersAvailable): raise KafkaClientConnectionErrors
def __init__(self, *, loop, bootstrap_servers='localhost', client_id='aiokafka-' + __version__, metadata_max_age_ms=300000, request_timeout_ms=40000, api_version='auto'): """Initialize an asynchronous kafka client Keyword Arguments: bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' strings) that the consumer should contact to bootstrap initial cluster metadata. This does not have to be the full node list. It just needs to have at least one broker that will respond to Metadata API Request. Default port is 9092. If no servers are specified, will default to localhost:9092. client_id (str): a name for this client. This string is passed in each request to servers and can be used to identify specific server-side log entries that correspond to this client. Also submitted to GroupCoordinator for logging with respect to consumer group administration. Default: 'aiokafka-{ver}' request_timeout_ms (int): Client request timeout in milliseconds. Default: 40000. metadata_max_age_ms (int): The period of time in milliseconds after which we force a refresh of metadata even if we haven't seen any partition leadership changes to proactively discover any new brokers or partitions. Default: 300000 api_version (str): specify which kafka API version to use. AIOKafka supports Kafka API versions >=0.9 only. If set to 'auto', will attempt to infer the broker version by probing various APIs. Default: auto """ self._bootstrap_servers = bootstrap_servers self._client_id = client_id self._metadata_max_age_ms = metadata_max_age_ms self._request_timeout_ms = request_timeout_ms self._api_version = api_version self.cluster = ClusterMetadata(metadata_max_age_ms=metadata_max_age_ms) self._topics = set() # empty set will fetch all topic metadata self._conns = {} self._loop = loop self._sync_task = None self._md_update_fut = asyncio.Future(loop=self._loop) self._md_update_waiter = asyncio.Future(loop=self._loop) self._get_conn_lock = asyncio.Lock(loop=loop)
async def test_add_batch_builder(self): tp0 = TopicPartition("test-topic", 0) tp1 = TopicPartition("test-topic", 1) def mocked_leader_for_partition(tp): if tp == tp0: return 0 if tp == tp1: return 1 return None cluster = ClusterMetadata(metadata_max_age_ms=10000) cluster.leader_for_partition = mock.MagicMock() cluster.leader_for_partition.side_effect = mocked_leader_for_partition ma = MessageAccumulator(cluster, 1000, 0, 1, loop=self.loop) builder0 = ma.create_builder() builder1_1 = ma.create_builder() builder1_2 = ma.create_builder() # batches may queued one-per-TP self.assertFalse(ma._wait_data_future.done()) await ma.add_batch(builder0, tp0, 1) self.assertTrue(ma._wait_data_future.done()) self.assertEqual(len(ma._batches[tp0]), 1) await ma.add_batch(builder1_1, tp1, 1) self.assertEqual(len(ma._batches[tp1]), 1) with self.assertRaises(KafkaTimeoutError): await ma.add_batch(builder1_2, tp1, 0.1) self.assertTrue(ma._wait_data_future.done()) self.assertEqual(len(ma._batches[tp1]), 1) # second batch gets added once the others are cleared out self.loop.call_later(0.1, ma.drain_by_nodes, []) await ma.add_batch(builder1_2, tp1, 1) self.assertTrue(ma._wait_data_future.done()) self.assertEqual(len(ma._batches[tp0]), 0) self.assertEqual(len(ma._batches[tp1]), 1)
from kafka.client import KafkaClient from kafka.cluster import ClusterMetadata client = KafkaClient(bootstrap_servers='localhost:9092', client_id='test_store_builder') response_metadata = client.poll(future=client.cluster.request_update()) cluster_metadata = ClusterMetadata(bootstrap_servers='localhost:9092') cluster_metadata.update_metadata(response_metadata[0]) cluster_metadata.partitions_for_topic('test-assignor')
def client(mocker): _cli = mocker.Mock(spec=KafkaClient(bootstrap_servers=[])) _cli.cluster = mocker.Mock(spec=ClusterMetadata()) return _cli
async def test_batch_pending_batch_list(self): # In message accumulator we have _pending_batches list, that stores # batches when those are delivered to node. We must be sure we never # lose a batch during retries and that we don't produce duplicate batch # links in the process tp0 = TopicPartition("test-topic", 0) def mocked_leader_for_partition(tp): if tp == tp0: return 0 return None cluster = ClusterMetadata(metadata_max_age_ms=10000) cluster.leader_for_partition = mock.MagicMock() cluster.leader_for_partition.side_effect = mocked_leader_for_partition ma = MessageAccumulator(cluster, 1000, 0, 1) fut1 = await ma.add_message(tp0, b'key', b'value', timeout=2) # Drain and Reenqueu batches, _ = ma.drain_by_nodes(ignore_nodes=[]) batch = batches[0][tp0] self.assertIn(batch, ma._pending_batches) self.assertFalse(ma._batches) self.assertFalse(fut1.done()) ma.reenqueue(batch) self.assertEqual(batch.retry_count, 1) self.assertFalse(ma._pending_batches) self.assertIn(batch, ma._batches[tp0]) self.assertFalse(fut1.done()) # Drain and Reenqueu again. We check for repeated call batches, _ = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(batches[0][tp0], batch) self.assertEqual(batch.retry_count, 2) self.assertIn(batch, ma._pending_batches) self.assertFalse(ma._batches) self.assertFalse(fut1.done()) ma.reenqueue(batch) self.assertEqual(batch.retry_count, 2) self.assertFalse(ma._pending_batches) self.assertIn(batch, ma._batches[tp0]) self.assertFalse(fut1.done()) # Drain and mark as done. Check that no link to batch remained batches, _ = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(batches[0][tp0], batch) self.assertEqual(batch.retry_count, 3) self.assertIn(batch, ma._pending_batches) self.assertFalse(ma._batches) self.assertFalse(fut1.done()) if hasattr(batch.future, "_callbacks"): # Vanilla asyncio self.assertEqual(len(batch.future._callbacks), 1) batch.done_noack() await asyncio.sleep(0.01) self.assertEqual(batch.retry_count, 3) self.assertFalse(ma._pending_batches) self.assertFalse(ma._batches)
def client(mocker): _cli = mocker.Mock( spec=KafkaClient(bootstrap_servers=(), api_version=(0, 9))) _cli.cluster = mocker.Mock(spec=ClusterMetadata()) return _cli
import signal import pprint from kafka.admin import KafkaAdminClient, NewTopic, ConfigResourceType, ConfigResource from kafka.cluster import ClusterMetadata from config import bootstrap_servers, ssl_cafile, ssl_certfile, ssl_keyfile adminClient = KafkaAdminClient(bootstrap_servers=bootstrap_servers, security_protocol="SSL", ssl_cafile=ssl_cafile, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile) clusterMetadata = ClusterMetadata(bootstrap_servers=bootstrap_servers, ) def createTopic(): try: topic = str(input("Please enter a topic name:")).strip() topic_partitions = int( input("Please enter a number of partition [1]:") or 1) if str( input( "Confirm the creation of topic '{}' with {} partitions? [Y/N]" .format(topic, topic_partitions))) == 'Y': print("Creating topic {}...".format(topic)) newTopic_list = [ NewTopic(name=topic, num_partitions=topic_partitions, replication_factor=1) ] adminClient.create_topics(newTopic_list)
def test_batch_done(self): tp0 = TopicPartition("test-topic", 0) tp1 = TopicPartition("test-topic", 1) tp2 = TopicPartition("test-topic", 2) tp3 = TopicPartition("test-topic", 3) def mocked_leader_for_partition(tp): if tp == tp0: return 0 if tp == tp1: return 1 if tp == tp2: return -1 return None cluster = ClusterMetadata(metadata_max_age_ms=10000) cluster.leader_for_partition = mock.MagicMock() cluster.leader_for_partition.side_effect = mocked_leader_for_partition ma = MessageAccumulator(cluster, 1000, None, 1, self.loop) fut1 = yield from ma.add_message( tp2, None, b'msg for tp@2', timeout=2) fut2 = yield from ma.add_message( tp3, None, b'msg for tp@3', timeout=2) yield from ma.add_message(tp1, None, b'0123456789'*70, timeout=2) with self.assertRaises(KafkaTimeoutError): yield from ma.add_message(tp1, None, b'0123456789'*70, timeout=2) batches, _ = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(batches[1][tp1].expired(), True) with self.assertRaises(LeaderNotAvailableError): yield from fut1 with self.assertRaises(NotLeaderForPartitionError): yield from fut2 fut01 = yield from ma.add_message( tp0, b'key0', b'value#0', timeout=2) fut02 = yield from ma.add_message( tp0, b'key1', b'value#1', timeout=2) fut10 = yield from ma.add_message( tp1, None, b'0123456789'*70, timeout=2) batches, _ = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(batches[0][tp0].expired(), False) self.assertEqual(batches[1][tp1].expired(), False) batch_data = batches[0][tp0].get_data_buffer() self.assertEqual(type(batch_data), io.BytesIO) batches[0][tp0].done(base_offset=10) class TestException(Exception): pass batches[1][tp1].done(exception=TestException()) res = yield from fut01 self.assertEqual(res.topic, "test-topic") self.assertEqual(res.partition, 0) self.assertEqual(res.offset, 10) res = yield from fut02 self.assertEqual(res.topic, "test-topic") self.assertEqual(res.partition, 0) self.assertEqual(res.offset, 11) with self.assertRaises(TestException): yield from fut10 fut01 = yield from ma.add_message( tp0, b'key0', b'value#0', timeout=2) batches, _ = ma.drain_by_nodes(ignore_nodes=[]) batches[0][tp0].done(base_offset=None) res = yield from fut01 self.assertEqual(res, None) # cancelling future fut01 = yield from ma.add_message( tp0, b'key0', b'value#2', timeout=2) batches, _ = ma.drain_by_nodes(ignore_nodes=[]) fut01.cancel() batches[0][tp0].done(base_offset=21) # no error in this case
def test_basic(self): cluster = ClusterMetadata(metadata_max_age_ms=10000) ma = MessageAccumulator(cluster, 1000, None, 30, self.loop) data_waiter = ma.data_waiter() done, _ = yield from asyncio.wait( [data_waiter], timeout=0.2, loop=self.loop) self.assertFalse(bool(done)) # no data in accumulator yet... tp0 = TopicPartition("test-topic", 0) tp1 = TopicPartition("test-topic", 1) yield from ma.add_message(tp0, b'key', b'value', timeout=2) yield from ma.add_message(tp1, None, b'value without key', timeout=2) done, _ = yield from asyncio.wait( [data_waiter], timeout=0.2, loop=self.loop) self.assertTrue(bool(done)) batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(batches, {}) self.assertEqual(unknown_leaders_exist, True) def mocked_leader_for_partition(tp): if tp == tp0: return 0 if tp == tp1: return 1 return -1 cluster.leader_for_partition = mock.MagicMock() cluster.leader_for_partition.side_effect = mocked_leader_for_partition batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(len(batches), 2) self.assertEqual(unknown_leaders_exist, False) m_set0 = batches[0].get(tp0) self.assertEqual(type(m_set0), MessageBatch) m_set1 = batches[1].get(tp1) self.assertEqual(type(m_set1), MessageBatch) self.assertEqual(m_set0.expired(), False) data_waiter = ensure_future(ma.data_waiter(), loop=self.loop) done, _ = yield from asyncio.wait( [data_waiter], timeout=0.2, loop=self.loop) self.assertFalse(bool(done)) # no data in accumulator again... # testing batch overflow tp2 = TopicPartition("test-topic", 2) yield from ma.add_message( tp0, None, b'some short message', timeout=2) yield from ma.add_message( tp0, None, b'some other short message', timeout=2) yield from ma.add_message( tp1, None, b'0123456789' * 70, timeout=2) yield from ma.add_message( tp2, None, b'message to unknown leader', timeout=2) # next we try to add message with len=500, # as we have buffer_size=1000 coroutine will block until data will be # drained add_task = ensure_future( ma.add_message(tp1, None, b'0123456789' * 50, timeout=2), loop=self.loop) done, _ = yield from asyncio.wait( [add_task], timeout=0.2, loop=self.loop) self.assertFalse(bool(done)) batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[1, 2]) self.assertEqual(unknown_leaders_exist, True) m_set0 = batches[0].get(tp0) self.assertEqual(m_set0._builder._relative_offset, 2) m_set1 = batches[1].get(tp1) self.assertEqual(m_set1, None) done, _ = yield from asyncio.wait( [add_task], timeout=0.1, loop=self.loop) self.assertFalse(bool(done)) # we stil not drained data for tp1 batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(unknown_leaders_exist, True) m_set0 = batches[0].get(tp0) self.assertEqual(m_set0, None) m_set1 = batches[1].get(tp1) self.assertEqual(m_set1._builder._relative_offset, 1) done, _ = yield from asyncio.wait( [add_task], timeout=0.2, loop=self.loop) self.assertTrue(bool(done)) batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(unknown_leaders_exist, True) m_set1 = batches[1].get(tp1) self.assertEqual(m_set1._builder._relative_offset, 1)
#For URL handling from urllib2 import urlopen import socket #For encryption # from Crypto.PublicKey import RSA # from Crypto import Random myIP = urlopen('http://ip.42.pl/raw').read() myPrivateIP = socket.gethostbyname(socket.gethostname()) print("myPrivateIP", myPrivateIP) producer = KafkaProducer( bootstrap_servers=[myPrivateIP + ':9090', myPrivateIP + ':9091'], api_version=(0, 10)) clusterMetadata = ClusterMetadata(brokers=producer) print("Metadata", clusterMetadata) print("All brokers Metadata", clusterMetadata.brokers()) print("Broker 0 Metadata", clusterMetadata.broker_metadata(0)) print("Broker 1 Metadata", clusterMetadata.broker_metadata(1)) print("Known Topics Metadata", clusterMetadata.topics()) #Assignment of arguments try: topic = argv[1] print("Topic found as arg ", topic) except IndexError: print(IndexError) quit() #Creation of message structure