Beispiel #1
0
def test_kafka_flush_on_big_message(kafka_cluster):
    # Create batchs of messages of size ~100Kb
    kafka_messages = 10000
    batch_messages = 1000
    messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(kafka_messages)]
    kafka_produce('flush', messages)

    instance.query('''
        DROP TABLE IF EXISTS test.view;
        DROP TABLE IF EXISTS test.consumer;
        CREATE TABLE test.kafka (key UInt64, value String)
            ENGINE = Kafka
            SETTINGS
                kafka_broker_list = 'kafka1:19092',
                kafka_topic_list = 'flush',
                kafka_group_name = 'flush',
                kafka_format = 'JSONEachRow',
                kafka_max_block_size = 10;
        CREATE TABLE test.view (key UInt64, value String)
            ENGINE = MergeTree
            ORDER BY key;
        CREATE MATERIALIZED VIEW test.consumer TO test.view AS
            SELECT * FROM test.kafka;
    ''')

    client = KafkaAdminClient(bootstrap_servers="localhost:9092")
    received = False
    while not received:
        try:
            offsets = client.list_consumer_group_offsets('flush')
            for topic, offset in offsets.items():
                if topic.topic == 'flush' and offset.offset == kafka_messages:
                    received = True
                    break
        except kafka.errors.GroupCoordinatorNotAvailableError:
            continue

    for _ in range(20):
        time.sleep(1)
        result = instance.query('SELECT count() FROM test.view')
        if int(result) == kafka_messages*batch_messages:
            break

    assert int(result) == kafka_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result)
Beispiel #2
0
from kafka import KafkaAdminClient
from kafka.admin import NewTopic

admin_client = KafkaAdminClient(
    bootstrap_servers=['kafka1:9094', 'kafka2:9095', 'kafka3:9096'],
    client_id='test')

topic_list = []

cfg1 = {
    "cleanup.policy": "compact",

    # The amount of time to retain delete tombstone markers for log compacted topics.
    # This setting also gives a bound on the time in which a consumer must complete a read
    # if they begin from offset 0 to ensure that they get a valid snapshot of the final stage
    # (otherwise delete tombstones may be collected before they complete their scan).
    "delete.retention.ms": "100",

    # This configuration controls the period of time after which Kafka will force the log to roll even
    # if the segment file isn't full to ensure that retention can delete or compact old data.
    "segment.ms": "100",

    # The minimum time a message will remain uncompacted in the log.
    # Only applicable for logs that are being compacted.
    "min.compaction.lag.ms": "0",
    "max.compaction.lag.ms": "1",

    # This configuration controls the segment file size for the log.
    # Retention and cleaning is always done a file at a time so a larger segment size means fewer
    # files but less granular control over retention.
    "segment.bytes": "14",
Beispiel #3
0
def delete_kafka_topic(config: KafkaInputConfig, topic):
    client = KafkaAdminClient(bootstrap_servers=config.brokers)
    client.delete_topics([topic])
    client.close()
class ClientAdmin:
    """
    封装kafka-python KafkaAdminClient
    """
    Num_Partitions = 3
    Replication_Factor = 3

    def __init__(self):
        pass

    def __enter__(self):
        self.cfg = Config().cfg
        self.admin_client = KafkaAdminClient(
            bootstrap_servers=self.cfg["serList"],
            # api_version=self.cfg["apiVersion"],
            api_version_auto_timeout_ms=self.cfg["autoVersionTimeout"],
            security_protocol=self.cfg["protocol"],
            sasl_mechanism=self.cfg["mechanism"],
            sasl_kerberos_service_name=self.cfg["kerverosSerName"])
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.admin_client.close()

    @staticmethod
    def new_topic(topic_name: str, ):
        """
        generate new topic object
        :return:
        """
        return NewTopic(name=topic_name,
                        num_partitions=ClientAdmin.Num_Partitions,
                        replication_factor=ClientAdmin.Replication_Factor,
                        replica_assignments=None,
                        topic_configs=None)

    def create_topic(self, topic_name: str):
        """
        在集群中创建新的topic(topic配置采用默认模式)
        :param topic_name:
        :return:
        """
        topic_list = [self.new_topic(topic_name)]
        try:
            response = self.admin_client.create_topics(
                topic_list, timeout_ms=TIME_OUT_ADMIN)
        except TopicAlreadyExistsError:
            log.tag_error(
                KafkaInfo.KafkaAdmin,
                "Topic [%s] already exist! Create Failed !" % topic_name)
            raise ActionError(KafkaErr.TopicExist)
        return response

    def delete_topic(self, topic_name: str):
        """
        删除集群中的topic
        :param topic_name:
        :return:
        """
        topic_list = [topic_name]
        try:
            self.admin_client.delete_topics(topic_list,
                                            timeout_ms=TIME_OUT_ADMIN)
        except UnknownTopicOrPartitionError as e:
            log.tag_error(
                KafkaInfo.KafkaAdmin,
                "Topic [%s] not exist! Don't need delete" % topic_name)
            raise ActionError(KafkaErr.TopicNotExist)

    def create_partition(self):
        """
        为现有主题创建其他分区
        :return:
        """

    def list_consumer_groups(self):
        """
        列出集群中的消费者集群
        :return:
        """
        return self.admin_client.list_consumer_groups()
from kafka import KafkaAdminClient, KafkaProducer
from kafka.admin import NewTopic

# Create 'my-topic' Kafka topic
try:
    admin = KafkaAdminClient(
        bootstrap_servers='kafka-1:19093,kafka-2:29093,kafka-3:39093')

    topic = NewTopic(name='demo-single-partition',
                     num_partitions=1,
                     replication_factor=1)
    admin.create_topics([topic])
except Exception:
    pass

producer = KafkaProducer(
    bootstrap_servers='kafka-1:19093,kafka-2:29093,kafka-3:39093')

for i in range(10):
    print(i)
    producer.send("demo-single-partition", b'msg %d' % i)

producer.close()
Beispiel #6
0
def create_kafka_topic(config: KafkaInputConfig, topic):
    client = KafkaAdminClient(bootstrap_servers=config.brokers)
    client.create_topics([NewTopic(topic, 1, 1)])
    client.close()
def create_topic(broker, topic, partitions, client):
    admin = KafkaAdminClient(bootstrap_servers=broker, client_id=client)

    topic = NewTopic(topic, num_partitions=partitions, replication_factor=2)

    admin.create_topics([topic])
Beispiel #8
0
 def setup_class(self):
     self.admin = KafkaAdminClient(bootstrap_servers='localhost:9092')
     self.consumer = SimpleElasticConsumer('configs/elastic/elastic-consumer-simple-test.yml')
     self.bulk_consumer = BulkElasticConsumer('configs/elastic/elastic-consumer-bulk-test.yml')
     self.producer = LineProducer("configs/elastic/json-lines-producer.yml")
     self.producer.process()
Beispiel #9
0
    def configure(
        self,
        topics=None,
        key_type=None,
        value_type=None,
        broker=None,
        consumer=None,
        producer=None,
        admin=None,
        client=None,
        concurrency=1,
    ):
        self.broker = broker or "localhost:9092"
        self.topics = topics or [f"agent-toipc-{self.name}"]
        self.key_type = key_type
        self.value_type = value_type
        self.concurrency = concurrency
        self.consumer_config = consumer or {}
        self.producer_config = producer or {}
        self.admin_config = admin or {}
        self.client_config = client or {}
        self.consumer = ConsumerGroupeComponent(
            name=self.name,
            topics=self.topics,
            key_type=self.key_type,
            value_type=self.value_type,
            processors=[Processor(_coro=self.coro, concurrency=self.concurrency)],
            config={
                **DEFAULT_CONFIG,
                **CONSUMER_DEFAULT_CONFIG,
                **self.consumer_config,
            },
        )
        self.producer = ProducerComponent(
            key_type=self.key_type,
            value_type=self.value_type,
            **{
                **DEFAULT_CONFIG,
                **PRODUCER_DEFAULT_CONFIG,
                **self.consumer_config,
                "bootstrap_servers": self.broker,
                "client_id": f"{self.name}:producer:{id(self)}",
            },
        )

        self.admin_client = KafkaAdminClient(
            **{
                **DEFAULT_CONFIG,
                **ADMIN_DEFAULT_CONFIG,
                **self.admin_config,
                "client_id": f"{self.name}:admin:{id(self)}",
            }
        )

        self.kafka_client = KafkaClient(DEFAULT_CONFIG["bootstrap_servers"])

        for topic in self.topics:
            count = self.get_topic_partition_count(topic)
            if count == 0:
                self.create_topic(
                    topic, {**DEFAULT_TOPIC_CONFIG, "partitions": self.concurrency}
                )
            if self.concurrency > count:
                self.update_partitions([topic], self.concurrency)
Beispiel #10
0
import flask_migrate
import flask_sqlalchemy
import flask_bcrypt

from dotenv import load_dotenv
from kafka import KafkaAdminClient, KafkaProducer

from config import _DOT_ENV_PATH, KAFKA_URL, CLIENT_ID

load_dotenv(_DOT_ENV_PATH)

db = flask_sqlalchemy.SQLAlchemy()
migrate = flask_migrate.Migrate(db=db)
bcrypt = flask_bcrypt.Bcrypt()
admin_client = KafkaAdminClient(
    bootstrap_servers=KAFKA_URL,
    client_id=CLIENT_ID)
kafka_producer = KafkaProducer(bootstrap_servers=KAFKA_URL,
                               client_id=CLIENT_ID)


def init_app(app, **kwargs):
    db.app = app
    db.init_app(app)
    migrate.init_app(app)


from .base import TimestampMixin
from .data import Data
from .url import Url
Beispiel #11
0
from kafka import KafkaProducer, KafkaConsumer
from kafka import KafkaAdminClient
from kafka.admin import NewTopic

# Config
msk_endpoint_tls = 'b-2.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9094,b-1.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9094,b-3.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9094'
msk_endpoint_plaintext = 'b-2.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9092,b-1.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9092,b-3.alec-mykafka-poc.mktj7c.c4.kafka.ap-southeast-2.amazonaws.com:9092'

msk_endpoint = msk_endpoint_plaintext

TOPIC_NAME = 'ac_topic'
producer = KafkaProducer(bootstrap_servers=msk_endpoint)
consumer = KafkaConsumer(TOPIC_NAME, bootstrap_servers=msk_endpoint)
admin = KafkaAdminClient(bootstrap_servers=msk_endpoint)


# Functions
def send_data():
    for _ in range(100):
        producer.send(TOPIC_NAME, value=b'some_message_bytes')


def consume_data():
    for _ in range(100):
        messages = consumer.poll(max_records=1)
        return messages


def create_topic(topic_name):
    topic_list = []
    topic_list.append(
Beispiel #12
0
def delete_kafka(topic_name):
    adminClient = KafkaAdminClient(bootstrap_servers=[KAFKA_HOST], )
    adminClient.delete_topics(topics=[topic_name])
Beispiel #13
0
def create_topic(topic_name):
    adminClient = KafkaAdminClient(bootstrap_servers=[KAFKA_HOST], )
    print(topic_name)
    adminClient.create_topics(topic_name)