def __init__(self, *topics, **configs): self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: if key in configs: self.config[key] = configs.pop(key) # Only check for extra config keys in top-level class assert not configs, 'Unrecognized configs: %s' % configs deprecated = {'smallest': 'earliest', 'largest': 'latest'} if self.config['auto_offset_reset'] in deprecated: new_config = deprecated[self.config['auto_offset_reset']] log.warning('use auto_offset_reset=%s (%s is deprecated)', new_config, self.config['auto_offset_reset']) self.config['auto_offset_reset'] = new_config request_timeout_ms = self.config['request_timeout_ms'] session_timeout_ms = self.config['session_timeout_ms'] fetch_max_wait_ms = self.config['fetch_max_wait_ms'] if request_timeout_ms <= session_timeout_ms: raise KafkaConfigurationError( "Request timeout (%s) must be larger than session timeout (%s)" % (request_timeout_ms, session_timeout_ms)) if request_timeout_ms <= fetch_max_wait_ms: raise KafkaConfigurationError( "Request timeout (%s) must be larger than fetch-max-wait-ms (%s)" % (request_timeout_ms, fetch_max_wait_ms)) metrics_tags = {'client-id': self.config['client_id']} metric_config = MetricConfig( samples=self.config['metrics_num_samples'], time_window_ms=self.config['metrics_sample_window_ms'], tags=metrics_tags) reporters = [ reporter() for reporter in self.config['metric_reporters'] ] self._metrics = Metrics(metric_config, reporters) # TODO _metrics likely needs to be passed to KafkaClient, etc. # api_version was previously a str. Accept old format for now if isinstance(self.config['api_version'], str): str_version = self.config['api_version'] if str_version == 'auto': self.config['api_version'] = None else: self.config['api_version'] = tuple( map(int, str_version.split('.'))) log.warning( 'use api_version=%s [tuple] -- "%s" as str is deprecated', str(self.config['api_version']), str_version) self._client = KafkaClient(metrics=self._metrics, **self.config) # Get auto-discovered version from client if necessary if self.config['api_version'] is None: self.config['api_version'] = self._client.config['api_version'] self._subscription = SubscriptionState( self.config['auto_offset_reset']) self._fetcher = Fetcher(self._client, self._subscription, self._metrics, **self.config) self._coordinator = ConsumerCoordinator( self._client, self._subscription, self._metrics, assignors=self.config['partition_assignment_strategy'], **self.config) self._closed = False self._iterator = None self._consumer_timeout = float('inf') if topics: self._subscription.subscribe(topics=topics) self._client.set_topics(topics)
def __init__(self, **configs): log.debug("Starting the Kafka producer") # trace self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: if key in configs: self.config[key] = configs.pop(key) # Only check for extra config keys in top-level class assert not configs, 'Unrecognized configs: %s' % (configs,) if self.config['client_id'] is None: self.config['client_id'] = 'kafka-python-producer-%s' % \ (PRODUCER_CLIENT_ID_SEQUENCE.increment(),) if self.config['acks'] == 'all': self.config['acks'] = -1 # api_version was previously a str. accept old format for now if isinstance(self.config['api_version'], str): deprecated = self.config['api_version'] if deprecated == 'auto': self.config['api_version'] = None else: self.config['api_version'] = tuple(map(int, deprecated.split('.'))) log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated', str(self.config['api_version']), deprecated) # Configure metrics metrics_tags = {'client-id': self.config['client_id']} metric_config = MetricConfig(samples=self.config['metrics_num_samples'], time_window_ms=self.config['metrics_sample_window_ms'], tags=metrics_tags) reporters = [reporter() for reporter in self.config['metric_reporters']] self._metrics = Metrics(metric_config, reporters) client = self.config['kafka_client']( metrics=self._metrics, metric_group_prefix='producer', wakeup_timeout_ms=self.config['max_block_ms'], **self.config) # Get auto-discovered version from client if necessary if self.config['api_version'] is None: self.config['api_version'] = client.config['api_version'] if self.config['compression_type'] == 'lz4': assert self.config['api_version'] >= (0, 8, 2), 'LZ4 Requires >= Kafka 0.8.2 Brokers' if self.config['compression_type'] == 'zstd': assert self.config['api_version'] >= (2, 1, 0), 'Zstd Requires >= Kafka 2.1.0 Brokers' # Check compression_type for library support ct = self.config['compression_type'] if ct not in self._COMPRESSORS: raise ValueError("Not supported codec: {}".format(ct)) else: checker, compression_attrs = self._COMPRESSORS[ct] assert checker(), "Libraries for {} compression codec not found".format(ct) self.config['compression_attrs'] = compression_attrs message_version = self._max_usable_produce_magic() self._accumulator = RecordAccumulator(message_version=message_version, metrics=self._metrics, **self.config) self._metadata = client.cluster guarantee_message_order = bool(self.config['max_in_flight_requests_per_connection'] == 1) self._sender = Sender(client, self._metadata, self._accumulator, self._metrics, guarantee_message_order=guarantee_message_order, **self.config) self._sender.daemon = True self._sender.start() self._closed = False self._cleanup = self._cleanup_factory() atexit.register(self._cleanup) log.debug("Kafka producer started")
def metrics(request, config, reporter): metrics = Metrics(config, [reporter], enable_expiration=True) request.addfinalizer(lambda: metrics.close()) return metrics
def coordinator(client): return ConsumerCoordinator(client, SubscriptionState(), Metrics())
def metrics(request, config, reporter): metrics = Metrics(config, [reporter], enable_expiration=True) yield metrics metrics.close()