def test_kafka_flush_on_big_message(kafka_cluster): # Create batchs of messages of size ~100Kb kafka_messages = 10000 batch_messages = 1000 messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(kafka_messages)] kafka_produce('flush', messages) instance.query(''' DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.kafka (key UInt64, value String) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', kafka_topic_list = 'flush', kafka_group_name = 'flush', kafka_format = 'JSONEachRow', kafka_max_block_size = 10; CREATE TABLE test.view (key UInt64, value String) ENGINE = MergeTree ORDER BY key; CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.kafka; ''') client = KafkaAdminClient(bootstrap_servers="localhost:9092") received = False while not received: try: offsets = client.list_consumer_group_offsets('flush') for topic, offset in offsets.items(): if topic.topic == 'flush' and offset.offset == kafka_messages: received = True break except kafka.errors.GroupCoordinatorNotAvailableError: continue for _ in range(20): time.sleep(1) result = instance.query('SELECT count() FROM test.view') if int(result) == kafka_messages*batch_messages: break assert int(result) == kafka_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result)
from kafka import KafkaAdminClient from kafka.admin import NewTopic admin_client = KafkaAdminClient( bootstrap_servers=['kafka1:9094', 'kafka2:9095', 'kafka3:9096'], client_id='test') topic_list = [] cfg1 = { "cleanup.policy": "compact", # The amount of time to retain delete tombstone markers for log compacted topics. # This setting also gives a bound on the time in which a consumer must complete a read # if they begin from offset 0 to ensure that they get a valid snapshot of the final stage # (otherwise delete tombstones may be collected before they complete their scan). "delete.retention.ms": "100", # This configuration controls the period of time after which Kafka will force the log to roll even # if the segment file isn't full to ensure that retention can delete or compact old data. "segment.ms": "100", # The minimum time a message will remain uncompacted in the log. # Only applicable for logs that are being compacted. "min.compaction.lag.ms": "0", "max.compaction.lag.ms": "1", # This configuration controls the segment file size for the log. # Retention and cleaning is always done a file at a time so a larger segment size means fewer # files but less granular control over retention. "segment.bytes": "14",
def delete_kafka_topic(config: KafkaInputConfig, topic): client = KafkaAdminClient(bootstrap_servers=config.brokers) client.delete_topics([topic]) client.close()
class ClientAdmin: """ 封装kafka-python KafkaAdminClient """ Num_Partitions = 3 Replication_Factor = 3 def __init__(self): pass def __enter__(self): self.cfg = Config().cfg self.admin_client = KafkaAdminClient( bootstrap_servers=self.cfg["serList"], # api_version=self.cfg["apiVersion"], api_version_auto_timeout_ms=self.cfg["autoVersionTimeout"], security_protocol=self.cfg["protocol"], sasl_mechanism=self.cfg["mechanism"], sasl_kerberos_service_name=self.cfg["kerverosSerName"]) return self def __exit__(self, exc_type, exc_val, exc_tb): self.admin_client.close() @staticmethod def new_topic(topic_name: str, ): """ generate new topic object :return: """ return NewTopic(name=topic_name, num_partitions=ClientAdmin.Num_Partitions, replication_factor=ClientAdmin.Replication_Factor, replica_assignments=None, topic_configs=None) def create_topic(self, topic_name: str): """ 在集群中创建新的topic(topic配置采用默认模式) :param topic_name: :return: """ topic_list = [self.new_topic(topic_name)] try: response = self.admin_client.create_topics( topic_list, timeout_ms=TIME_OUT_ADMIN) except TopicAlreadyExistsError: log.tag_error( KafkaInfo.KafkaAdmin, "Topic [%s] already exist! Create Failed !" % topic_name) raise ActionError(KafkaErr.TopicExist) return response def delete_topic(self, topic_name: str): """ 删除集群中的topic :param topic_name: :return: """ topic_list = [topic_name] try: self.admin_client.delete_topics(topic_list, timeout_ms=TIME_OUT_ADMIN) except UnknownTopicOrPartitionError as e: log.tag_error( KafkaInfo.KafkaAdmin, "Topic [%s] not exist! Don't need delete" % topic_name) raise ActionError(KafkaErr.TopicNotExist) def create_partition(self): """ 为现有主题创建其他分区 :return: """ def list_consumer_groups(self): """ 列出集群中的消费者集群 :return: """ return self.admin_client.list_consumer_groups()
from kafka import KafkaAdminClient, KafkaProducer from kafka.admin import NewTopic # Create 'my-topic' Kafka topic try: admin = KafkaAdminClient( bootstrap_servers='kafka-1:19093,kafka-2:29093,kafka-3:39093') topic = NewTopic(name='demo-single-partition', num_partitions=1, replication_factor=1) admin.create_topics([topic]) except Exception: pass producer = KafkaProducer( bootstrap_servers='kafka-1:19093,kafka-2:29093,kafka-3:39093') for i in range(10): print(i) producer.send("demo-single-partition", b'msg %d' % i) producer.close()
def create_kafka_topic(config: KafkaInputConfig, topic): client = KafkaAdminClient(bootstrap_servers=config.brokers) client.create_topics([NewTopic(topic, 1, 1)]) client.close()
def create_topic(broker, topic, partitions, client): admin = KafkaAdminClient(bootstrap_servers=broker, client_id=client) topic = NewTopic(topic, num_partitions=partitions, replication_factor=2) admin.create_topics([topic])
def setup_class(self): self.admin = KafkaAdminClient(bootstrap_servers='localhost:9092') self.consumer = SimpleElasticConsumer('configs/elastic/elastic-consumer-simple-test.yml') self.bulk_consumer = BulkElasticConsumer('configs/elastic/elastic-consumer-bulk-test.yml') self.producer = LineProducer("configs/elastic/json-lines-producer.yml") self.producer.process()
def configure( self, topics=None, key_type=None, value_type=None, broker=None, consumer=None, producer=None, admin=None, client=None, concurrency=1, ): self.broker = broker or "localhost:9092" self.topics = topics or [f"agent-toipc-{self.name}"] self.key_type = key_type self.value_type = value_type self.concurrency = concurrency self.consumer_config = consumer or {} self.producer_config = producer or {} self.admin_config = admin or {} self.client_config = client or {} self.consumer = ConsumerGroupeComponent( name=self.name, topics=self.topics, key_type=self.key_type, value_type=self.value_type, processors=[Processor(_coro=self.coro, concurrency=self.concurrency)], config={ **DEFAULT_CONFIG, **CONSUMER_DEFAULT_CONFIG, **self.consumer_config, }, ) self.producer = ProducerComponent( key_type=self.key_type, value_type=self.value_type, **{ **DEFAULT_CONFIG, **PRODUCER_DEFAULT_CONFIG, **self.consumer_config, "bootstrap_servers": self.broker, "client_id": f"{self.name}:producer:{id(self)}", }, ) self.admin_client = KafkaAdminClient( **{ **DEFAULT_CONFIG, **ADMIN_DEFAULT_CONFIG, **self.admin_config, "client_id": f"{self.name}:admin:{id(self)}", } ) self.kafka_client = KafkaClient(DEFAULT_CONFIG["bootstrap_servers"]) for topic in self.topics: count = self.get_topic_partition_count(topic) if count == 0: self.create_topic( topic, {**DEFAULT_TOPIC_CONFIG, "partitions": self.concurrency} ) if self.concurrency > count: self.update_partitions([topic], self.concurrency)
import flask_migrate import flask_sqlalchemy import flask_bcrypt from dotenv import load_dotenv from kafka import KafkaAdminClient, KafkaProducer from config import _DOT_ENV_PATH, KAFKA_URL, CLIENT_ID load_dotenv(_DOT_ENV_PATH) db = flask_sqlalchemy.SQLAlchemy() migrate = flask_migrate.Migrate(db=db) bcrypt = flask_bcrypt.Bcrypt() admin_client = KafkaAdminClient( bootstrap_servers=KAFKA_URL, client_id=CLIENT_ID) kafka_producer = KafkaProducer(bootstrap_servers=KAFKA_URL, client_id=CLIENT_ID) def init_app(app, **kwargs): db.app = app db.init_app(app) migrate.init_app(app) from .base import TimestampMixin from .data import Data from .url import Url
from kafka import KafkaProducer, KafkaConsumer from kafka import KafkaAdminClient from kafka.admin import NewTopic # Config msk_endpoint_tls = 'b-2.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9094,b-1.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9094,b-3.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9094' msk_endpoint_plaintext = 'b-2.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9092,b-1.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9092,b-3.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9092' msk_endpoint = msk_endpoint_plaintext TOPIC_NAME = 'ac_topic' producer = KafkaProducer(bootstrap_servers=msk_endpoint) consumer = KafkaConsumer(TOPIC_NAME, bootstrap_servers=msk_endpoint) admin = KafkaAdminClient(bootstrap_servers=msk_endpoint) # Functions def send_data(): for _ in range(100): producer.send(TOPIC_NAME, value=b'some_message_bytes') def consume_data(): for _ in range(100): messages = consumer.poll(max_records=1) return messages def create_topic(topic_name): topic_list = [] topic_list.append(
def delete_kafka(topic_name): adminClient = KafkaAdminClient(bootstrap_servers=[KAFKA_HOST], ) adminClient.delete_topics(topics=[topic_name])
def create_topic(topic_name): adminClient = KafkaAdminClient(bootstrap_servers=[KAFKA_HOST], ) print(topic_name) adminClient.create_topics(topic_name)