def get_topic_config(self, topic): """Get configuration information for specified topic. :rtype : dict of configuration """ try: config_data = load_json( self.get( "/config/topics/{topic}".format(topic=topic) )[0] ) except NoNodeError as e: # Kafka version before 0.8.1 does not have "/config/topics/<topic_name>" path in ZK and # if the topic exists, return default dict instead of raising an Exception. # Ref: https://cwiki.apache.org/confluence/display/KAFKA/Kafka+data+structures+in+Zookeeper. topics = self.get_topics(topic_name=topic, fetch_partition_state=False) if len(topics) > 0: _log.info("Configuration not available for topic {topic}.".format(topic=topic)) config_data = {"config": {}} else: _log.error( "topic {topic} not found.".format(topic=topic) ) raise e return config_data
def _get_entity_config(self, entity_type, entity_name, entity_exists): """Get configuration information for specified broker. :entity_type : "brokers" or "topics" :entity_name : broker id or topic name :entity_exists : fn(entity_name) -> bool to determine whether an entity exists. used to determine whether to throw an exception when a configuration cannot be found for the given entity_name :rtype : dict of configuration """ assert entity_type in ( "brokers", "topics"), "Supported entities are brokers and topics" try: config_data = load_json( self.get("/config/{entity_type}/{entity_name}".format( entity_type=entity_type, entity_name=entity_name))[0]) except NoNodeError as e: if entity_exists(entity_name): _log.info( "Configuration not available for {entity_type} {entity_name}." .format( entity_type=entity_type, entity_name=entity_name, )) config_data = {"config": {}} else: _log.error("{entity_type} {entity_name} not found".format( entity_type=entity_type, entity_name=entity_name)) raise e return config_data
def get_topic_config(self, topic): """Get configuration information for specified topic. :rtype : dict of configuration """ try: config_data = load_json( self.get("/config/topics/{topic}".format(topic=topic))[0]) except NoNodeError as e: # Kafka version before 0.8.1 does not have "/config/topics/<topic_name>" path in ZK and # if the topic exists, return default dict instead of raising an Exception. # Ref: https://cwiki.apache.org/confluence/display/KAFKA/Kafka+data+structures+in+Zookeeper. topics = self.get_topics(topic_name=topic, fetch_partition_state=False) if len(topics) > 0: _log.info( "Configuration not available for topic {topic}.".format( topic=topic)) config_data = {"config": {}} else: _log.error("topic {topic} not found.".format(topic=topic)) raise e return config_data
def get_broker_metadata(self, broker_id): try: broker_json, _ = self.get( "/brokers/ids/{b_id}".format(b_id=broker_id)) except NoNodeError: _log.error("broker '{b_id}' not found.".format(b_id=broker_id), ) raise return load_json(broker_json)
def get_pending_plan(self): """Read the currently running plan on reassign_partitions node.""" reassignment_path = '{admin}/{reassignment_node}'\ .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE) try: result = self.get(reassignment_path) return load_json(result[0]) except NoNodeError: return {}
def _fetch_partition_state(self, topic_id, partition_id): """Fetch partition-state for given topic-partition.""" state_path = "/brokers/topics/{topic_id}/partitions/{p_id}/state" try: partition_json, _ = self.get( state_path.format(topic_id=topic_id, p_id=partition_id), ) return load_json(partition_json) except NoNodeError: return {} # The partition has no data
def get_broker_metadata(self, broker_id): try: broker_json, _ = self.get( "/brokers/ids/{b_id}".format(b_id=broker_id) ) except NoNodeError: _log.error( "broker '{b_id}' not found.".format(b_id=broker_id), ) raise return load_json(broker_json)
def get_topic_config(self, topic): """Get configuration information for specified topic. :rtype : dict of configuration """ try: config_data = load_json( self.get("/config/topics/{topic}".format(topic=topic))[0]) except NoNodeError as e: _log.error("topic {topic} not found.".format(topic=topic)) raise e return config_data
def get_broker_metadata(self, broker_id): try: broker_json = load_json( self.get("/brokers/ids/{b_id}".format(b_id=broker_id))[0]) if (broker_json['host'] is None): pattern = '(?:[SSL|INTERNAL|PLAINTEXTSASL].*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*' result = re.search(pattern, broker_json['endpoints'][0]) broker_json['host'] = result.group('host') except NoNodeError: _log.error("broker '{b_id}' not found.".format(b_id=broker_id), ) raise return broker_json
def execute_plan(self, plan, allow_rf_change=False, allow_rf_mismatch=False): """Submit reassignment plan for execution.""" reassignment_path = '{admin}/{reassignment_node}'\ .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE) plan_json = dump_json(plan) topic_names_from_proposed_plan = set() for partition in plan['partitions']: topic_names_from_proposed_plan.add(partition['topic']) base_plan = self.get_cluster_plan( topic_names=list(topic_names_from_proposed_plan)) if not validate_plan(plan, base_plan, allow_rf_change=allow_rf_change, allow_rf_mismatch=allow_rf_mismatch): _log.error( 'Given plan is invalid. Aborting new reassignment plan ... {plan}' .format(plan=plan)) return False # Send proposed-plan to zookeeper try: _log.info('Sending plan to Zookeeper...') self.create(reassignment_path, plan_json, makepath=True) _log.info( 'Re-assign partitions node in Zookeeper updated successfully ' 'with {plan}'.format(plan=plan), ) return True except NodeExistsError: _log.warning('Previous plan in progress. Exiting..') _log.warning( 'Aborting new reassignment plan... {plan}'.format(plan=plan)) in_progress_plan = load_json(self.get(reassignment_path)[0]) in_progress_partitions = [ '{topic}-{p_id}'.format( topic=p_data['topic'], p_id=str(p_data['partition']), ) for p_data in in_progress_plan['partitions'] ] _log.warning( '{count} partition(s) reassignment currently in progress:-'. format(count=len(in_progress_partitions)), ) _log.warning( '{partitions}. In Progress reassignment plan...'.format( partitions=', '.join(in_progress_partitions), ), ) return False except Exception as e: _log.error( 'Could not re-assign partitions {plan}. Error: {e}'.format( plan=plan, e=e), ) return False
def execute_plan(self, plan, allow_rf_change=False): """Submit reassignment plan for execution.""" reassignment_path = '{admin}/{reassignment_node}'\ .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE) plan_json = dump_json(plan) base_plan = self.get_cluster_plan() if not validate_plan(plan, base_plan, allow_rf_change=allow_rf_change): _log.error('Given plan is invalid. Aborting new reassignment plan ... {plan}'.format(plan=plan)) return False # Send proposed-plan to zookeeper try: _log.info('Sending plan to Zookeeper...') self.create(reassignment_path, plan_json, makepath=True) _log.info( 'Re-assign partitions node in Zookeeper updated successfully ' 'with {plan}'.format(plan=plan), ) return True except NodeExistsError: _log.warning('Previous plan in progress. Exiting..') _log.warning('Aborting new reassignment plan... {plan}'.format(plan=plan)) in_progress_plan = load_json(self.get(reassignment_path)[0]) in_progress_partitions = [ '{topic}-{p_id}'.format( topic=p_data['topic'], p_id=str(p_data['partition']), ) for p_data in in_progress_plan['partitions'] ] _log.warning( '{count} partition(s) reassignment currently in progress:-' .format(count=len(in_progress_partitions)), ) _log.warning( '{partitions}. In Progress reassignment plan...'.format( partitions=', '.join(in_progress_partitions), ), ) return False except Exception as e: _log.error( 'Could not re-assign partitions {plan}. Error: {e}' .format(plan=plan, e=e), ) return False
def get_json(self, path, watch=None): """Reads the data of the specified node and converts it to json.""" data, _ = self.get(path, watch) return load_json(data) if data else None
def get_group_offsets(self, group, topic=None): """Fetch group offsets for given topic and partition otherwise all topics and partitions otherwise. { 'topic': { 'partition': offset-value, ... ... } } """ group_offsets = {} try: all_topics = self.get_my_subscribed_topics(group) except NoNodeError: # No offset information of given consumer-group _log.warning( "No topics subscribed to consumer-group {group}.".format( group=group, ), ) return group_offsets if topic: if topic in all_topics: topics = [topic] else: _log.error( "Topic {topic} not found in topic list {topics} for consumer" "-group {consumer_group}.".format( topic=topic, topics=', '.join(topic for topic in all_topics), consumer_group=group, ), ) return group_offsets else: topics = all_topics for topic in topics: group_offsets[topic] = {} try: partitions = self.get_my_subscribed_partitions(group, topic) except NoNodeError: _log.warning( "No partition offsets found for topic {topic}. " "Continuing to next one...".format(topic=topic), ) continue # Fetch offsets for each partition for partition in partitions: path = "/consumers/{group_id}/offsets/{topic}/{partition}".format( group_id=group, topic=topic, partition=partition, ) try: # Get current offset offset_json, _ = self.get(path) group_offsets[topic][partition] = load_json(offset_json) except NoNodeError: _log.error("Path {path} not found".format(path=path)) raise return group_offsets
def get_multiple_topics( self, topic_names=None, names_only=False, fetch_partition_state=True, ): """Get information on all the available topics. Topic-data format with fetch_partition_state as False :- topic_data = { 'version': 1, 'partitions': { <p_id>: { replicas: <broker-ids> } } } Topic-data format with fetch_partition_state as True:- topic_data = { 'version': 1, 'ctime': <timestamp>, 'partitions': { <p_id>:{ replicas: [<broker_id>, <broker_id>, ...], isr: [<broker_id>, <broker_id>, ...], controller_epoch: <val>, leader_epoch: <val>, version: 1, leader: <broker-id>, ctime: <timestamp>, } } } Note: By default we also fetch partition-state which results in accessing the zookeeper twice. If just partition-replica information is required fetch_partition_state should be set to False. """ try: if not topic_names: topic_names = self.get_children("/brokers/topics") except NoNodeError: _log.error("Cluster is empty.") return {} if names_only: return topic_names topics_data = {} for topic_id in topic_names: try: topic_info = self.get( "/brokers/topics/{id}".format(id=topic_id)) topic_data = load_json(topic_info[0]) topic_ctime = topic_info[1].ctime / 1000.0 topic_data['ctime'] = topic_ctime except NoNodeError: _log.info( "topic '{topic}' not found.".format(topic=topic_id), ) return {} # Prepare data for each partition partitions_data = {} for p_id, replicas in six.iteritems(topic_data['partitions']): partitions_data[p_id] = {} if fetch_partition_state: # Fetch partition-state from zookeeper partition_state = self._fetch_partition_state( topic_id, p_id) partitions_data[p_id] = load_json(partition_state[0]) partitions_data[p_id][ 'ctime'] = partition_state[1].ctime / 1000.0 else: # Fetch partition-info from zookeeper partition_info = self._fetch_partition_info(topic_id, p_id) partitions_data[p_id][ 'ctime'] = partition_info.ctime / 1000.0 partitions_data[p_id]['replicas'] = replicas topic_data['partitions'] = partitions_data topics_data[topic_id] = topic_data return topics_data
def get_topics( self, topic_name=None, names_only=False, fetch_partition_state=True, ): """Get information on all the available topics. Topic-data format with fetch_partition_state as False :- topic_data = { 'version': 1, 'partitions': { <p_id>: { replicas: <broker-ids> } } } Topic-data format with fetch_partition_state as True:- topic_data = { 'version': 1, 'ctime': <timestamp>, 'partitions': { <p_id>:{ replicas: [<broker_id>, <broker_id>, ...], isr: [<broker_id>, <broker_id>, ...], controller_epoch: <val>, leader_epoch: <val>, version: 1, leader: <broker-id>, ctime: <timestamp>, } } } Note: By default we also fetch partition-state which results in accessing the zookeeper twice. If just partition-replica information is required fetch_partition_state should be set to False. """ try: topic_ids = [topic_name] if topic_name else self.get_children( "/brokers/topics", ) except NoNodeError: _log.error( "Cluster is empty." ) return {} if names_only: return topic_ids topics_data = {} for topic_id in topic_ids: try: topic_info = self.get("/brokers/topics/{id}".format(id=topic_id)) topic_data = load_json(topic_info[0]) topic_ctime = topic_info[1].ctime / 1000.0 topic_data['ctime'] = topic_ctime except NoNodeError: _log.info( "topic '{topic}' not found.".format(topic=topic_id), ) return {} # Prepare data for each partition partitions_data = {} for p_id, replicas in six.iteritems(topic_data['partitions']): partitions_data[p_id] = {} if fetch_partition_state: # Fetch partition-state from zookeeper partition_state = self._fetch_partition_state(topic_id, p_id) partitions_data[p_id] = load_json(partition_state[0]) partitions_data[p_id]['ctime'] = partition_state[1].ctime / 1000.0 else: # Fetch partition-info from zookeeper partition_info = self._fetch_partition_info(topic_id, p_id) partitions_data[p_id]['ctime'] = partition_info.ctime / 1000.0 partitions_data[p_id]['replicas'] = replicas topic_data['partitions'] = partitions_data topics_data[topic_id] = topic_data return topics_data