def run(self): try: consumer = KafkaConsumer(self.response_topic, bootstrap_servers=self.servers, client_id=random_string()) self.logger.info("manifest consumer on {0} kafka topic".format( self.response_topic)) for message in consumer: try: json_str = message.value json_msg = json.loads(json_str) if is_close_msg(json_msg): print(json_str) break except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) self.logger.error(str(exc_type)) self.logger.error(str(exc_value)) break consumer.close() admin = KafkaAdminClient(bootstrap_servers=self.servers) admin.delete_topics([self.response_topic]) admin.close() except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) self.logger.error(str(exc_type)) self.logger.error(str(exc_value))
def del_topic(args): admin = KafkaAdminClient(bootstrap_servers=[args.broker]) try: admin.delete_topics(topics=args.topics) except UnknownTopicOrPartitionError: pass admin.close()
def create_topic(self, topic_name: str, num_partitions: int = 1, replication_factor: int = 1): """ Creates a new topic if not already present :param topic_name: New topic name :param num_partitions: Number of partitions. Default value is 1. :param replication_factor: Replication factor. Default value is 1. """ logger.debug(f"Creating a topic called {topic_name}") admin = KafkaAdminClient(**self.settings) existing_topics = admin.list_topics() if topic_name in existing_topics: admin.close() return topic = [ NewTopic(topic_name, num_partitions=num_partitions, replication_factor=replication_factor) ] admin.create_topics(topic) admin.close() logger.debug(f"Topic {topic_name} created")
class TestElasticProducer(object): def setup_class(self): self.admin = KafkaAdminClient(bootstrap_servers='localhost:9092') self.index = ElasticIndex('test-elastic-producer', 'doc') self.index.index_into({'test': 1}, 0) self.index.index_into({'test': 2}, 1) self.index.index_into({'test': 3}, 2) self.index.index_into({'test': 4}, 3) self.index.index_into({'test': 5}, 4) self.producer = ElasticProducer('configs/elastic/test_elastic_producer_producer.yml') self.consumer = SimpleConsumer('configs/elastic/test_elastic_producer_consumer.yml') def teardown_class(self): self.consumer.close() self.admin.delete_topics(['test-elastic-producer']) self.admin.close() self.index.delete() #@pytest.mark.skip() def test_produce(self): self.producer.process() key, message = self.consumer.consume() assert key == '0' assert message == '{"test": 1}'
def create_topic(brokers, topic, partition_count=1, replica_count=1): """Create a topic if it does not exist. Args: brokers (list): The 'host[:port]' list that the producer should contact to bootstrap initial cluster metadata. topic (str): Topic where the message will be published. partition_count (int): Specified partition number (default 1). replica_count (int): Specified replication factor (default 1). Returns: partitions (set): A set including partition number. """ consumer = KafkaConsumer(bootstrap_servers=brokers) topics = consumer.topics() if topic in topics: partitions = consumer.partitions_for_topic(topic) consumer.close() else: consumer.close() admin = KafkaAdminClient(bootstrap_servers=brokers) admin.create_topics([ NewTopic( name=topic, num_partitions=partition_count, replication_factor=replica_count, ), ]) admin.close() partitions = set([p for p in range(partition_count)]) return partitions
def run(self): try: self.logger.info("starting rosbag_consumer:{0}".format( self.response_topic)) rospy.init_node("mozart_rosbag_{0}".format(random_string(6))) consumer = KafkaConsumer(self.response_topic, bootstrap_servers=self.servers, client_id=random_string()) if self.s3: self.s3_reader = S3Reader(self.s3_read_req, self.s3_read_resp) self.s3_deleter = S3Deleter(self.s3_delete_req) self.s3_reader.start() self.s3_deleter.start() for msg in consumer: try: json_str = msg.value json_msg = json.loads(json_str) if is_close_msg(json_msg): print(json_str) break self.publish_bag(json_msg) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) print(str(e)) break if self.s3: self.read_s3(drain=True) self.s3_read_req.put("__close__") self.s3_reader.join(timeout=2) if self.s3_reader.is_alive(): self.s3_reader.terminate() self.s3_delete_req.put("__close__") time.sleep(5) self.s3_deleter.join(timeout=2) if self.s3_deleter.is_alive(): self.s3_deleter.terminate() else: for dir in self.clean_up: shutil.rmtree(dir, ignore_errors=True) consumer.close() admin = KafkaAdminClient(bootstrap_servers=self.servers) admin.delete_topics([self.response_topic]) admin.close() except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) print(str(e))
def main(event, context): print("Looking up broker ARN") svc_client = boto3.client('servicediscovery') response = svc_client.discover_instances(NamespaceName=NAMESPACE, ServiceName=SERVICE) broker_arn = response['Instances'][0]['Attributes']['broker_arn'] print("Got broker ARN {0}".format(broker_arn)) print("Looking up broker string") msk_client = boto3.client('kafka') response = msk_client.get_bootstrap_brokers(ClusterArn=broker_arn) broker_string = response['BootstrapBrokerStringTls'] print("Got broker string {0}".format(broker_string)) # make sure topic exists print("Checking if topic {0} exists".format(TOPIC_NAME)) kclient = KafkaConsumer(bootstrap_servers=broker_string, security_protocol='SSL') existing_topics = kclient.topics() if TOPIC_NAME in existing_topics: print("Topic {0} exists".format(TOPIC_NAME)) else: print("Topic {0} does not exist, creating".format(TOPIC_NAME)) topic_list = [] topic = NewTopic(name=TOPIC_NAME, num_partitions=1, replication_factor=1) topic_list.append(topic) kadmin = KafkaAdminClient(bootstrap_servers=broker_string, security_protocol='SSL') kadmin.create_topics(new_topics=topic_list) kadmin.close() kclient.close() producer = KafkaProducer(bootstrap_servers=broker_string, security_protocol='SSL') while True: remaining_time_millis = context.get_remaining_time_in_millis() if remaining_time_millis < MIN_TIME_REMAINING_MILLIS: print("Time left ({0}) is less than time required ({1}), exiting". format(str(remaining_time_millis), str(MIN_TIME_REMAINING_MILLIS))) break else: print( "Time left ({0}) is greater than time required ({1}), sending". format(str(remaining_time_millis), str(MIN_TIME_REMAINING_MILLIS))) msg = "Kafka message sent at {0}".format(str(time.time())) producer.send(TOPIC_NAME, msg.encode('utf-8')) producer.flush() time.sleep(SLEEP_SECONDS) producer.close() print("All done")
def add_topic(args): admin = KafkaAdminClient(bootstrap_servers=[args.broker]) admin.create_topics([ NewTopic( name=args.topic, num_partitions=args.partitions, replication_factor=args.replication_factor, ), ]) admin.close()
class AdminClient(KafkaPython): def __init__(self, bootstrap_servers=None, **kwargs): super().__init__(servers=bootstrap_servers) admin_client_config.update(kwargs) self.engine = KafkaAdminClient( bootstrap_servers=self._bootstrap_servers, client_id=self._client_id, request_timeout_ms=self._request_timeout_ms, **admin_client_config) def create_topics(self, topic_list: list): new_topic = [] for k, v in enumerate(topic_list): new_topic.append( NewTopics(name=v['name'], num_partitions=v['num_partitions'], replication_factor=v['replication_factor'], replica_assignments=v['replica_assignments'], topic_configs=v['topic_configs'])) if not Consumer().get_user_topics().intersection( {item['name'] for i, item in enumerate(topic_list)}): try: self.engine.create_topics(new_topic, **create_topic_config) except KafkaError as e: _logger.error(e) self.engine.close() else: _logger.error( self._logMsg(create_topic_fail_code, self._client_id, 'topic重复')) return def delete_topics(self, topic: list): if Consumer().get_user_topics().intersection(set(topic)): try: self.engine.delete_topics(topic, self._request_timeout_ms) except KafkaError as e: _logger.error( self._logMsg(delete_topic_fail_code, self._client_id, '删除的topic失败:%s' % e)) self.engine.close() else: _logger.error( self._logMsg(delete_topic_fail_code, self._client_id, '需删除的topic不存在')) return
def kafka_topic_factory(): import logging from contextlib import contextmanager, suppress from smart_getenv import getenv from kafka import KafkaAdminClient from kafka.admin import NewTopic from kafka.errors import UnknownTopicOrPartitionError from kirby.api.ext.topic import topic_retry_decorator logger = logging.getLogger(__name__) bootstrap_servers = getenv( "KAFKA_BOOTSTRAP_SERVERS", type=list, separator="," ) if bootstrap_servers: args = {"bootstrap_servers": bootstrap_servers} if getenv("KAFKA_USE_TLS", type=bool): args.update( { "security_protocol": "SSL", "ssl_cafile": getenv("KAFKA_SSL_CAFILE"), "ssl_certfile": getenv("KAFKA_SSL_CERTFILE"), "ssl_keyfile": getenv("KAFKA_SSL_KEYFILE"), } ) admin = KafkaAdminClient(**args) @topic_retry_decorator @contextmanager def create_kafka_topic(topic_name, timeout_ms=1500): with suppress(UnknownTopicOrPartitionError): admin.delete_topics([topic_name], timeout_ms=10000) admin.create_topics( [NewTopic(topic_name, 1, 1)], timeout_ms=timeout_ms ) yield admin.delete_topics([topic_name], timeout_ms=10000) yield create_kafka_topic admin.close() else: logger.warning( f"There is no KAFKA_BOOTSTRAP_SERVERS. " "Creation of kafka_topic skipped." ) yield
class Admin: def __init__(self, bootstrap_servers): self.admin = KafkaAdminClient(bootstrap_servers=bootstrap_servers) def create_topic(self, topic): try: topic = NewTopic(name=topic, num_partitions=1, replication_factor=1) self.admin.create_topics([topic], timeout_ms=2000) except Exception: pass def delete_topic(self, topic): res = self.admin.delete_topics([topic], 2000) def close(self): self.admin.close()
def _createTopicIfNeeded(kafka_connect): admin_client = KafkaAdminClient( bootstrap_servers=kafka_connect['uri'], security_protocol="SSL", ssl_cafile=os.path.join(os.getcwd(), kafka_connect['cafile']), ssl_certfile=os.path.join(os.getcwd(), kafka_connect['certfile']), ssl_keyfile=os.path.join(os.getcwd(), kafka_connect['keyfile'])) try: admin_client.create_topics(new_topics=[ NewTopic(name=kafka_connect['topic'], num_partitions=1, replication_factor=1) ], validate_only=False) admin_client.close() except TopicAlreadyExistsError: pass
def collect_topic_information(bootstrap_servers, old_consumer_group): """Gets a list of current topics being subscribed to by this consumer group that we may need to remove with the migration. Using the `list_consumer_group_offsets()` function since `describe_consumer_groups()` doesn't return proper data. :param bootstrap_servers: The Kafka brokers in the cluster to connect to. :param old_consumer_group: The consumer group we are migrating from. """ adminClient = KafkaAdminClient(bootstrap_servers=bootstrap_servers) results = adminClient.list_consumer_group_offsets(old_consumer_group) topics = [] for k, v in results.items(): topic = k._asdict()['topic'] if topic not in topics: topics.append(topic) adminClient.close() return topics
def collect_old_consumer_group_offsets(bootstrap_servers, old_consumer_group, removed_topics): """ Connects to the brokers specified to gather current offset information of the consumer group we're migrating from. :param bootstrap_servers: The Kafka brokers in the cluster to connect to. :param old_consumer_group: The consumer group we are migrating from. """ adminClient = KafkaAdminClient(bootstrap_servers=bootstrap_servers) results = adminClient.list_consumer_group_offsets(old_consumer_group) delimeter = ',' with open(OUTPUT_FILE, 'w') as f: for k, v in results.items(): if len(removed_topics) > 0: topic = k._asdict()['topic'] if topic in removed_topics: continue f.write(str(k._asdict()['topic']) + delimeter) f.write(str(k._asdict()['partition']) + delimeter) f.write(str(v._asdict()['offset']) + '\n') adminClient.close()
from kafka import KafkaAdminClient import os admin = KafkaAdminClient(bootstrap_servers='localhost:9092') os.system('pkill -9 -f app.py') os.system('pkill -9 -f consumer.py') os.system('pkill -9 -f producer.py') admin.delete_topics(['dashboard1', 'clickhouse1']) admin.close() #os.system('sudo docker stop $(sudo docker ps -a -q)')
#!/usr/bin/env python # -*- coding: utf-8 -*- from kafka import KafkaAdminClient servers = ['192.168.5.110:9092'] adminClient = KafkaAdminClient(bootstrap_servers=servers) adminClient.delete_topics(['test']) print(adminClient.list_consumer_groups()) adminClient.close()
def create_kafka_topic(config: KafkaInputConfig, topic): client = KafkaAdminClient(bootstrap_servers=config.brokers) client.create_topics([NewTopic(topic, 1, 1)]) client.close()
class TestLineProducer(object): def setup_class(self): self.admin = KafkaAdminClient(bootstrap_servers='localhost:9092') self.producer = LineProducer('configs/lines/producer.yml') self.producer_gz = LineProducer('configs/lines/producer_gz.yml') self.producer_bz2 = LineProducer('configs/lines/producer_bz2.yml') self.consumer = SimpleConsumer('configs/lines/consumer.yml') self.consumer_gz = SimpleConsumer('configs/lines/consumer_gz.yml') self.consumer_bz2 = SimpleConsumer('configs/lines/consumer_bz2.yml') self.ntriples_producer = SortedNTriplesCollectorProducer( 'configs/nt/producer.yml') self.ntriples_consumer = BulkElasticConsumer('configs/nt/consumer.yml') def teardown_class(self): self.consumer.close() self.consumer_gz.close() self.consumer_bz2.close() try: self.admin.delete_topics(['test-lines-gz']) except UnknownTopicOrPartitionError: pass try: self.admin.delete_topics(['test-lines']) except UnknownTopicOrPartitionError: pass try: self.admin.delete_topics(['test-lines-bz2']) except UnknownTopicOrPartitionError: pass try: self.admin.delete_topics(['test-sorted-nt-resource']) except UnknownTopicOrPartitionError: pass self.admin.close() def test_ntriples_producer(self): self.ntriples_producer.process() assert self.ntriples_consumer.consume() #@pytest.mark.skip() def test_produce(self): self.producer.process() key = None message = None while key is None and message is None: key, message = self.consumer.consume() assert key == '0' assert message == "This is a line" key = None message = None while key is None and message is None: key, message = self.consumer.consume() assert key == '1' assert message == "and another line" key = None message = None while key is None and message is None: key, message = self.consumer.consume() assert key == '2' assert message == "a third line" key = None message = None while key is None and message is None: key, message = self.consumer.consume() assert key == '3' assert message == "a forth line" key = None message = None while key is None and message is None: key, message = self.consumer.consume() assert key == '4' assert message == "a lot of lines now" #@pytest.mark.skip("Currently way too slow") def test_produce_gz(self): self.producer_gz.process() key = None message = None while key is None and message is None: key, message = self.consumer_gz.consume() assert key == '0' assert message == "This is a line" key = None message = None while key is None and message is None: key, message = self.consumer_gz.consume() assert key == '1' assert message == "and another line" key = None message = None while key is None and message is None: key, message = self.consumer_gz.consume() assert key == '2' assert message == "a third line" key = None message = None while key is None and message is None: key, message = self.consumer_gz.consume() assert key == '3' assert message == "a forth line" key = None message = None while key is None and message is None: key, message = self.consumer_gz.consume() assert key == '4' assert message == "a lot of lines now" # @pytest.mark.skip("Currently way too slow") def test_produce_bz2(self): self.producer_bz2.process() key = None message = None while key is None and message is None: key, message = self.consumer_bz2.consume() assert key == '0' assert message == "This is a line" key = None message = None while key is None and message is None: key, message = self.consumer_bz2.consume() assert key == '1' assert message == "and another line" key = None message = None while key is None and message is None: key, message = self.consumer_bz2.consume() assert key == '2' assert message == "a third line" key = None message = None while key is None and message is None: key, message = self.consumer_bz2.consume() assert key == '3' assert message == "a forth line" key = None message = None while key is None and message is None: key, message = self.consumer_bz2.consume() assert key == '4' assert message == "a lot of lines now"
def client(): c = KafkaAdminClient(bootstrap_servers='localhost') yield c c.close()
def delete_kafka_topic(config: KafkaInputConfig, topic): client = KafkaAdminClient(bootstrap_servers=config.brokers) client.delete_topics([topic]) client.close()
class ClientAdmin: """ 封装kafka-python KafkaAdminClient """ Num_Partitions = 3 Replication_Factor = 3 def __init__(self): pass def __enter__(self): self.cfg = Config().cfg self.admin_client = KafkaAdminClient( bootstrap_servers=self.cfg["serList"], # api_version=self.cfg["apiVersion"], api_version_auto_timeout_ms=self.cfg["autoVersionTimeout"], security_protocol=self.cfg["protocol"], sasl_mechanism=self.cfg["mechanism"], sasl_kerberos_service_name=self.cfg["kerverosSerName"]) return self def __exit__(self, exc_type, exc_val, exc_tb): self.admin_client.close() @staticmethod def new_topic(topic_name: str, ): """ generate new topic object :return: """ return NewTopic(name=topic_name, num_partitions=ClientAdmin.Num_Partitions, replication_factor=ClientAdmin.Replication_Factor, replica_assignments=None, topic_configs=None) def create_topic(self, topic_name: str): """ 在集群中创建新的topic(topic配置采用默认模式) :param topic_name: :return: """ topic_list = [self.new_topic(topic_name)] try: response = self.admin_client.create_topics( topic_list, timeout_ms=TIME_OUT_ADMIN) except TopicAlreadyExistsError: log.tag_error( KafkaInfo.KafkaAdmin, "Topic [%s] already exist! Create Failed !" % topic_name) raise ActionError(KafkaErr.TopicExist) return response def delete_topic(self, topic_name: str): """ 删除集群中的topic :param topic_name: :return: """ topic_list = [topic_name] try: self.admin_client.delete_topics(topic_list, timeout_ms=TIME_OUT_ADMIN) except UnknownTopicOrPartitionError as e: log.tag_error( KafkaInfo.KafkaAdmin, "Topic [%s] not exist! Don't need delete" % topic_name) raise ActionError(KafkaErr.TopicNotExist) def create_partition(self): """ 为现有主题创建其他分区 :return: """ def list_consumer_groups(self): """ 列出集群中的消费者集群 :return: """ return self.admin_client.list_consumer_groups()
def latest_topic(self): admin = KafkaAdminClient(bootstrap_servers=self.kafka_brokers) topic = sorted(admin.list_topics())[-1] print(topic) admin.close() return topic
def kafka_admin(): _client = KafkaAdminClient() yield _client _client.close()
parser = argparse.ArgumentParser(description="Create a kafka topic") parser.add_argument('--host', help="The host to talk to", default='localhost:9092', type=str) parser.add_argument('--client_id', help="The client id to use", default='admin', type=str) parser.add_argument('topic_name', help="The name of the topic", type=str) parser.add_argument('num_partitions', help="The number of partitions", type=int) parser.add_argument('--replicas', help="The number of replicas", default=1, type=int) args = parser.parse_args() admin_client = KafkaAdminClient(bootstrap_servers=args.host, client_id=args.client_id) admin_client.create_topics([ NewTopic(args.topic_name, num_partitions=int(args.num_partitions), replication_factor=args.replicas) ]) admin_client.close()
def main(): parser = argparse.ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("zookeeper_server", help="Kafka zookeeper server (<server:port>)") parser.add_argument("bootstrap_server", help="Kafka bootstrap server (<server:port>)") parser.add_argument("-i", "--iterations", type=int, default=20, help="Maximum number of partitions to move.") parser.add_argument( "-p", "--partition-percentage", type=float, default=90, help= "Don't move partitions whose sizes are within this percent of each other, to avoid swapping similar-sized shards." ) parser.add_argument( "-P", "--disk-percentage", type=float, default=10, help= "Don't exchange between nodes whose sizes are within this many percentage points of each other." ) parser.add_argument("-d", "--dry-run", action="store_true", help="Don't perform moves, just plan") parser.add_argument("-v", "--verbose", action="count", help="Verbose logging") parser.add_argument( "--net-throttle", type=int, default=40000000, help="Limit transfer between brokers by this amount, in bytes/sec") parser.add_argument( "--disk-throttle", type=int, default=200000000, help= "Limit transfer between disks on the same brokers by this amount, in bytes/set" ) parser.add_argument( "--no-wait", action="store_true", help= "Don't wait for rebalancing to finish. Default is to watch for results from remote host" ) args = parser.parse_args() logging.basicConfig(level=logging.INFO) if args.verbose and args.verbose >= 1: logging.basicConfig(level=logging.DEBUG) logging.getLogger("kafka_rebalance").setLevel(logging.DEBUG) logging.getLogger("rebalance_core").setLevel(logging.DEBUG) if os.path.exists("{}/reassign.json".format(SCRIPTDIR)): LOG.info( "Reassignment JSON exists. Is a reassignment already running?") exit(0) settings = PlanSettings( max_iters=args.iterations, node_percentage_threshold=args.disk_percentage, item_percentage_threshold=args.partition_percentage, swap=False, ) kafka_admin = KafkaAdminClient(bootstrap_servers=args.bootstrap_server) try: partitions, brokers = fetch(kafka_admin, disk_glob="/kafka/*", ssh_args={"user": "******"}) finally: kafka_admin.close() disks = [disk for broker in brokers for disk in broker.disks] LOG.info("Begin planning") moving_partitions = plan(disks, settings) for replica in moving_partitions: LOG.info("Moving {}-{} from {} to {}".format(replica.topic, replica.id, replica.initial_owner, replica.planned_owner)) json_data = gen_reassignment_file(partitions, moving_partitions) LOG.info("JSON reassignment data: {}".format(pformat(json_data))) if args.dry_run: LOG.info("Dry run complete, run without -d/--dry-run to execute") exit(0) with open("{}/reassign.json".format(SCRIPTDIR), "w") as f_out: json.dump(json_data, f_out) work_broker = random.choice(brokers) if not exec_reassign(json_data, work_broker, args.zookeeper_server, args.net_throttle, args.disk_throttle, not args.no_wait): exit(1) try: os.unlink("{}/reassign.json".format(SCRIPTDIR)) except Exception: pass