def __init__(self, kafka_brokers, security_protocol="", sasl_mechanism="", sasl_plain_username="", sasl_plain_password="", ssl_context="", timeout=5000): self.kafka_brokers = kafka_brokers self.security_protocol = security_protocol self.sasl_mechanism = sasl_mechanism self.sasl_plain_username = sasl_plain_username self.sasl_plain_password = sasl_plain_password self.ssl_context = ssl_context self.timeout = timeout if security_protocol: self.client = KafkaClient(bootstrap_servers=kafka_brokers, security_protocol=security_protocol, sasl_mechanism=sasl_mechanism, sasl_plain_username=sasl_plain_username, sasl_plain_password=sasl_plain_password, ssl_context=ssl_context, timeout=timeout) else: self.client = KafkaClient(bootstrap_servers=kafka_brokers, timeout=timeout) self.lag_topics_found = [] self.lag_total = 0
def test_bootstrap_servers(mocker, bootstrap, expected_hosts): mocker.patch.object(KafkaClient, '_bootstrap') if bootstrap is None: KafkaClient() else: KafkaClient(bootstrap_servers=bootstrap) # host order is randomized internally, so resort before testing (hosts, ), _ = KafkaClient._bootstrap.call_args # pylint: disable=no-member assert sorted(hosts) == sorted(expected_hosts)
def test_send(conn): cli = KafkaClient() # Send to unknown node => raises AssertionError try: cli.send(2, None) assert False, 'Exception not raised' except AssertionError: pass # Send to disconnected node => NodeNotReady conn.state = ConnectionStates.DISCONNECTED f = cli.send(0, None) assert f.failed() assert isinstance(f.exception, Errors.NodeNotReadyError) conn.state = ConnectionStates.CONNECTED cli._maybe_connect(0) # ProduceRequest w/ 0 required_acks -> no response request = ProduceRequest[0](0, 0, []) ret = cli.send(0, request) assert conn.send.called_with(request, expect_response=False) assert isinstance(ret, Future) request = MetadataRequest[0]([]) cli.send(0, request) assert conn.send.called_with(request, expect_response=True)
def test_finish_connect(conn): cli = KafkaClient() try: # Node not in metadata, raises AssertionError cli._initiate_connect(2) except AssertionError: pass else: assert False, 'Exception not raised' assert 0 not in cli._conns cli._initiate_connect(0) conn.connect.return_value = ConnectionStates.CONNECTING state = cli._finish_connect(0) assert 0 in cli._connecting assert state is ConnectionStates.CONNECTING conn.connect.return_value = ConnectionStates.CONNECTED state = cli._finish_connect(0) assert 0 not in cli._connecting assert state is ConnectionStates.CONNECTED # Failure to connect should trigger metadata update assert not cli.cluster._need_update cli._connecting.add(0) conn.connect.return_value = ConnectionStates.DISCONNECTED state = cli._finish_connect(0) assert 0 not in cli._connecting assert state is ConnectionStates.DISCONNECTED assert cli.cluster._need_update
def test_is_ready(mocker, conn): cli = KafkaClient() cli._maybe_connect(0) cli._maybe_connect(1) # metadata refresh blocks ready nodes assert cli.is_ready(0) assert cli.is_ready(1) cli._metadata_refresh_in_progress = True assert not cli.is_ready(0) assert not cli.is_ready(1) # requesting metadata update also blocks ready nodes cli._metadata_refresh_in_progress = False assert cli.is_ready(0) assert cli.is_ready(1) cli.cluster.request_update() cli.cluster.config['retry_backoff_ms'] = 0 assert not cli._metadata_refresh_in_progress assert not cli.is_ready(0) assert not cli.is_ready(1) cli.cluster._need_update = False # if connection can't send more, not ready assert cli.is_ready(0) conn.can_send_more.return_value = False assert not cli.is_ready(0) conn.can_send_more.return_value = True # disconnected nodes, not ready assert cli.is_ready(0) conn.state = ConnectionStates.DISCONNECTED assert not cli.is_ready(0)
def _create_kafka_client(self): kafka_conn_str = self.instance.get('kafka_connect_str') if not isinstance(kafka_conn_str, (string_types, list)): raise ConfigurationError('kafka_connect_str should be string or list of strings') kafka_version = self.instance.get('kafka_client_api_version') if isinstance(kafka_version, str): kafka_version = tuple(map(int, kafka_version.split("."))) kafka_client = KafkaClient( bootstrap_servers=kafka_conn_str, client_id='dd-agent', request_timeout_ms=self.init_config.get('kafka_timeout', DEFAULT_KAFKA_TIMEOUT) * 1000, # if `kafka_client_api_version` is not set, then kafka-python automatically probes the cluster for broker # version during the bootstrapping process. Note that probing randomly picks a broker to probe, so in a # mixed-version cluster probing returns a non-deterministic result. api_version=kafka_version, # While we check for SSL params, if not present they will default to the kafka-python values for plaintext # connections security_protocol=self.instance.get('security_protocol', 'PLAINTEXT'), sasl_mechanism=self.instance.get('sasl_mechanism'), sasl_plain_username=self.instance.get('sasl_plain_username'), sasl_plain_password=self.instance.get('sasl_plain_password'), sasl_kerberos_service_name=self.instance.get('sasl_kerberos_service_name', 'kafka'), sasl_kerberos_domain_name=self.instance.get('sasl_kerberos_domain_name'), ssl_cafile=self.instance.get('ssl_cafile'), ssl_check_hostname=self.instance.get('ssl_check_hostname', True), ssl_certfile=self.instance.get('ssl_certfile'), ssl_keyfile=self.instance.get('ssl_keyfile'), ssl_crlfile=self.instance.get('ssl_crlfile'), ssl_password=self.instance.get('ssl_password'), ) # Force initial population of the local cluster metadata cache kafka_client.poll(future=kafka_client.cluster.request_update()) if kafka_client.cluster.topics(exclude_internal_topics=False) is None: raise RuntimeError("Local cluster metadata cache did not populate.") return kafka_client
def test_poll(mocker): metadata = mocker.patch.object(KafkaClient, '_maybe_refresh_metadata') _poll = mocker.patch.object(KafkaClient, '_poll') ifrs = mocker.patch.object(KafkaClient, 'in_flight_request_count') ifrs.return_value = 1 cli = KafkaClient(api_version=(0, 9)) # metadata timeout wins metadata.return_value = 1000 cli.poll() _poll.assert_called_with(1.0) # user timeout wins cli.poll(250) _poll.assert_called_with(0.25) # default is request_timeout_ms metadata.return_value = 1000000 cli.poll() _poll.assert_called_with(cli.config['request_timeout_ms'] / 1000.0) # If no in-flight-requests, drop timeout to retry_backoff_ms ifrs.return_value = 0 cli.poll() _poll.assert_called_with(cli.config['retry_backoff_ms'] / 1000.0)
def test_poll(mocker): mocker.patch.object(KafkaClient, '_bootstrap') metadata = mocker.patch.object(KafkaClient, '_maybe_refresh_metadata') _poll = mocker.patch.object(KafkaClient, '_poll') cli = KafkaClient() tasks = mocker.patch.object(cli._delayed_tasks, 'next_at') # metadata timeout wins metadata.return_value = 1000 tasks.return_value = 2 cli.poll() _poll.assert_called_with(1.0, sleep=True) # user timeout wins cli.poll(250) _poll.assert_called_with(0.25, sleep=True) # tasks timeout wins tasks.return_value = 0 cli.poll(250) _poll.assert_called_with(0, sleep=True) # default is request_timeout_ms metadata.return_value = 1000000 tasks.return_value = 10000 cli.poll() _poll.assert_called_with(cli.config['request_timeout_ms'] / 1000.0, sleep=True)
def send_task(logger, zk_cli): # get next task task = tasks.next_task(zk_cli, TASK_DEFINITION_PATH) topic = JobSourceTopic() # get postfix val, stats = zk_cli.get(KAFKA_POSTFIX_PATH) producer = topic.init_producer() producer.change_postfix(val) kfk_cli = KafkaClient(bootstrap_servers=settings.KAFKA_SERVER) # current topics (error_code, topic, is_internal, partitions) is_fetch_metadata_success = False try: topics_res = kafka_utils.get_metadata(kfk_cli, 10000) if topics_res is not None: topics = (x[1] for x in topics_res.topics) logger.debug("Current topics in kafka: %s" % str(list(topics))) is_fetch_metadata_success = True except Exception: pass if not is_fetch_metadata_success: logger.warn("Failed to fetch metadata from kafka") producer.produce(task) logger.info('sent to Topic %s, feed=%2d. %s' % (producer._topic_name, task['order'], task['name'])) return task
def test_maybe_auto_commit_offsets_sync(mocker, api_version, group_id, enable, error, has_auto_commit, commit_offsets, warn, exc): mock_warn = mocker.patch('kafka.coordinator.consumer.log.warning') mock_exc = mocker.patch('kafka.coordinator.consumer.log.exception') coordinator = ConsumerCoordinator(KafkaClient(), SubscriptionState(), Metrics(), 'consumer', api_version=api_version, enable_auto_commit=enable, group_id=group_id) commit_sync = mocker.patch.object(coordinator, 'commit_offsets_sync', side_effect=error) if has_auto_commit: assert coordinator._auto_commit_task is not None coordinator._auto_commit_task.enable() assert coordinator._auto_commit_task._enabled is True else: assert coordinator._auto_commit_task is None assert coordinator._maybe_auto_commit_offsets_sync() is None if has_auto_commit: assert coordinator._auto_commit_task is not None assert coordinator._auto_commit_task._enabled is False assert commit_sync.call_count == (1 if commit_offsets else 0) assert mock_warn.call_count == (1 if warn else 0) assert mock_exc.call_count == (1 if exc else 0)
def __init__(self, **configs): log.debug("Starting KafkaAdminClient with configuration: %s", configs) extra_configs = set(configs).difference(self.DEFAULT_CONFIG) if extra_configs: raise KafkaConfigurationError( "Unrecognized configs: {}".format(extra_configs)) self.config = copy.copy(self.DEFAULT_CONFIG) self.config.update(configs) # Configure metrics metrics_tags = {'client-id': self.config['client_id']} metric_config = MetricConfig( samples=self.config['metrics_num_samples'], time_window_ms=self.config['metrics_sample_window_ms'], tags=metrics_tags) reporters = [ reporter() for reporter in self.config['metric_reporters'] ] self._metrics = Metrics(metric_config, reporters) self._client = KafkaClient(metrics=self._metrics, metric_group_prefix='admin', **self.config) # Get auto-discovered version from client if necessary if self.config['api_version'] is None: self.config['api_version'] = self._client.config['api_version'] self._closed = False self._refresh_controller_id() log.debug("KafkaAdminClient started.")
def __init__(self, **configs): self.config = copy.copy(self.DEFAULT_CONFIG) self.config.update(configs) self._client = KafkaClient(**self.config) self._coordinator_id = None self.group_id = configs['group_id'] self.topic = configs['topic']
def setup(topic_name): # First, check if the topic already exists in kafka kafka_client = KafkaClient(bootstrap_servers=KAFKA_SERVER, api_version=(2, 5, 0)) future = kafka_client.cluster.request_update() kafka_client.poll(future=future) metadata = kafka_client.cluster current_topics = metadata.topics() kafka_client.close() print('Active topics:', current_topics) if topic_name not in current_topics: print(f'Creating topic {topic_name}...') kafka_admin_client = KafkaAdminClient(bootstrap_servers=KAFKA_SERVER, api_version=(2, 5, 0)) topic_list = [ NewTopic(name=topic_name, num_partitions=1, replication_factor=1) ] kafka_admin_client.create_topics(new_topics=topic_list, validate_only=False) kafka_admin_client.close() else: print(f'Topic {topic_name} exists')
def delete_topic(): """ Delete the specified topic """ if ARG.server: ARG.server += ':9092' else: ARG.server = ','.join(SERVER['Kafka']['broker_list']) client = KafkaClient(bootstrap_servers=ARG.server) try: topic_req = admin.DeleteTopicsRequest_v1(topics=[ARG.topic], timeout=1000) future = client.send(client.least_loaded_node(), topic_req) client.poll(timeout_ms=100, future=future) result = future.value LOGGER.debug(result) error_code = result.topic_error_codes[0][1] if error_code: LOGGER.critical('Could not delete topic %s, error code=%d', ARG.topic, error_code) sys.exit(error_code) else: print("Deleted topic %s" % (ARG.topic)) except KafkaError: LOGGER.critical("Could not delete topic %s", ARG.topic)
def test_conn_state_change(mocker, conn): cli = KafkaClient() sel = mocker.patch.object(cli, '_selector') node_id = 0 conn.state = ConnectionStates.CONNECTING cli._conn_state_change(node_id, conn) assert node_id in cli._connecting sel.register.assert_called_with(conn._sock, selectors.EVENT_WRITE) conn.state = ConnectionStates.CONNECTED cli._conn_state_change(node_id, conn) assert node_id not in cli._connecting sel.unregister.assert_called_with(conn._sock) sel.register.assert_called_with(conn._sock, selectors.EVENT_READ, conn) # Failure to connect should trigger metadata update assert cli.cluster._need_update is False conn.state = ConnectionStates.DISCONNECTING cli._conn_state_change(node_id, conn) assert node_id not in cli._connecting assert cli.cluster._need_update is True sel.unregister.assert_called_with(conn._sock) conn.state = ConnectionStates.CONNECTING cli._conn_state_change(node_id, conn) assert node_id in cli._connecting conn.state = ConnectionStates.DISCONNECTING cli._conn_state_change(node_id, conn) assert node_id not in cli._connecting
def test_maybe_auto_commit_offsets_sync(mocker, api_version, group_id, enable, error, has_auto_commit, commit_offsets, warn, exc): mock_warn = mocker.patch('kafka.coordinator.consumer.log.warning') mock_exc = mocker.patch('kafka.coordinator.consumer.log.exception') client = KafkaClient(api_version=api_version) coordinator = ConsumerCoordinator(client, SubscriptionState(), Metrics(), api_version=api_version, session_timeout_ms=30000, max_poll_interval_ms=30000, enable_auto_commit=enable, group_id=group_id) commit_sync = mocker.patch.object(coordinator, 'commit_offsets_sync', side_effect=error) if has_auto_commit: assert coordinator.next_auto_commit_deadline is not None else: assert coordinator.next_auto_commit_deadline is None assert coordinator._maybe_auto_commit_offsets_sync() is None if has_auto_commit: assert coordinator.next_auto_commit_deadline is not None assert commit_sync.call_count == (1 if commit_offsets else 0) assert mock_warn.call_count == (1 if warn else 0) assert mock_exc.call_count == (1 if exc else 0)
def cli(mocker, conn): mocker.patch('kafka.cluster.dns_lookup', return_value=[(socket.AF_INET, None, None, None, ('localhost', 9092))]) client = KafkaClient(api_version=(0, 9)) client.poll(future=client.cluster.request_update()) return client
def test_autocommit_enable_api_version(conn, api_version): coordinator = ConsumerCoordinator( KafkaClient(), SubscriptionState(), api_version=api_version) if api_version < (0, 8, 1): assert coordinator._auto_commit_task is None else: assert coordinator._auto_commit_task is not None
def test_init(conn): cli = KafkaClient() coordinator = ConsumerCoordinator(cli, SubscriptionState()) # metadata update on init assert cli.cluster._need_update is True assert WeakMethod(coordinator._handle_metadata_update) in cli.cluster._listeners
def get_clients(self, cnt=1, client_id=None): if client_id is None: client_id = 'client' return tuple( KafkaClient(client_id='%s_%s' % (client_id, random_string(4)), bootstrap_servers=self.bootstrap_server()) for x in range(cnt))
def test_bootstrap_failure(conn): conn.state = ConnectionStates.DISCONNECTED cli = KafkaClient() conn.assert_called_once_with('localhost', 9092, **cli.config) conn.connect.assert_called_with() conn.close.assert_called_with() assert cli._bootstrap_fails == 1 assert cli.cluster.brokers() == set()
def test_bootstrap_success(conn): conn.state = ConnectionStates.CONNECTED cli = KafkaClient() conn.assert_called_once_with('localhost', 9092, **cli.config) conn.connect.assert_called_with() conn.send.assert_called_once_with(MetadataRequest([])) assert cli._bootstrap_fails == 0 assert cli.cluster.brokers() == set([BrokerMetadata(0, 'foo', 12), BrokerMetadata(1, 'bar', 34)])
def __init__(self, **configs): self.zk_client = None self.zk_configuration = None self.zookeeper_sleep_time = 5 self.zookeeper_max_retries = 5 self.kafka_sleep_time = 5 self.kafka_max_retries = 5 self.client = KafkaClient(**configs) self.refresh()
def test_bootstrap_failure(conn): conn.state = ConnectionStates.DISCONNECTED cli = KafkaClient() args, kwargs = conn.call_args assert args == ('localhost', 9092, socket.AF_UNSPEC) kwargs.pop('state_change_callback') assert kwargs == cli.config conn.connect.assert_called_with() conn.close.assert_called_with() assert cli._bootstrap_fails == 1 assert cli.cluster.brokers() == set()
def __init__(self, **configs): # Only check for extra config keys in top-level class extra_configs = set(configs).difference(self.DEFAULT_CONFIG) if extra_configs: raise KafkaConfigurationError("Unrecognized configs: %s" % extra_configs) self.config = copy.copy(self.DEFAULT_CONFIG) self.config.update(configs) self._client = KafkaClient(**self.config)
def __init__(self, *topics, **configs): self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: if key in configs: self.config[key] = configs.pop(key) # Only check for extra config keys in top-level class assert not configs, 'Unrecognized configs: %s' % configs deprecated = {'smallest': 'earliest', 'largest': 'latest'} if self.config['auto_offset_reset'] in deprecated: new_config = deprecated[self.config['auto_offset_reset']] log.warning('use auto_offset_reset=%s (%s is deprecated)', new_config, self.config['auto_offset_reset']) self.config['auto_offset_reset'] = new_config metrics_tags = {'client-id': self.config['client_id']} metric_config = MetricConfig(samples=self.config['metrics_num_samples'], time_window_ms=self.config['metrics_sample_window_ms'], tags=metrics_tags) reporters = [reporter() for reporter in self.config['metric_reporters']] self._metrics = Metrics(metric_config, reporters) # TODO _metrics likely needs to be passed to KafkaClient, etc. # api_version was previously a str. Accept old format for now if isinstance(self.config['api_version'], str): str_version = self.config['api_version'] if str_version == 'auto': self.config['api_version'] = None else: self.config['api_version'] = tuple(map(int, str_version.split('.'))) log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated', str(self.config['api_version']), str_version) self._client = KafkaClient(metrics=self._metrics, **self.config) # Get auto-discovered version from client if necessary if self.config['api_version'] is None: self.config['api_version'] = self._client.config['api_version'] self._subscription = SubscriptionState(self.config['auto_offset_reset']) self._fetcher = Fetcher( self._client, self._subscription, self._metrics, **self.config) self._coordinator = ConsumerCoordinator( self._client, self._subscription, self._metrics, assignors=self.config['partition_assignment_strategy'], **self.config) self._closed = False self._iterator = None self._consumer_timeout = float('inf') if topics: self._subscription.subscribe(topics=topics) self._client.set_topics(topics)
def client(mocker): _poll = mocker.patch.object(KafkaClient, '_poll') cli = KafkaClient(request_timeout_ms=9999999, reconnect_backoff_ms=2222, connections_max_idle_ms=float('inf'), api_version=(0, 9)) ttl = mocker.patch.object(cli.cluster, 'ttl') ttl.return_value = 0 return cli
def test_autocommit_enable_api_version(conn, api_version): coordinator = ConsumerCoordinator(KafkaClient(), SubscriptionState(), enable_auto_commit=True, group_id='foobar', api_version=api_version) if api_version < (0, 8, 1): assert coordinator._auto_commit_task is None assert coordinator.config['enable_auto_commit'] is False else: assert coordinator._auto_commit_task is not None assert coordinator.config['enable_auto_commit'] is True
def client(mocker): mocker.patch.object(KafkaClient, '_bootstrap') _poll = mocker.patch.object(KafkaClient, '_poll') cli = KafkaClient(request_timeout_ms=9999999, retry_backoff_ms=2222, api_version=(0, 9)) tasks = mocker.patch.object(cli._delayed_tasks, 'next_at') tasks.return_value = 9999999 ttl = mocker.patch.object(cli.cluster, 'ttl') ttl.return_value = 0 return cli
def test_bootstrap_failure(conn): conn.connect_blocking.return_value = False cli = KafkaClient(api_version=(0, 9)) args, kwargs = conn.call_args assert args == ('localhost', 9092, socket.AF_UNSPEC) kwargs.pop('state_change_callback') kwargs.pop('node_id') assert kwargs == cli.config conn.connect_blocking.assert_called_with() conn.close.assert_called_with() assert cli._bootstrap_fails == 1 assert cli.cluster.brokers() == set()