def get_connection(self): if not self.bootstrap_server: self.app.logger.info( "The connection to Kafka is disabled. Check the '.env' file!") return True # True is needed to run in platform-only mode try: # Check the connectivity and update kafka topics self.system_topics = self.k_admin_client.list_topics( timeout=3.0).topics # create PLATFORM_TOPIC if not already done if not self.system_topics.get(PLATFORM_TOPIC, None): self.k_admin_client.create_topics( [kafka_admin.NewTopic(PLATFORM_TOPIC, 3, 1)]) self.app.logger.info( "Created platform logging topic with name '{}'.".format( PLATFORM_TOPIC)) self.app.logger.debug( "Connected to Kafka Bootstrap Servers '{}'.".format( self.bootstrap_server)) return True except cimpl.KafkaException: self.app.logger.error( "Couldn't connect to Kafka Bootstrap servers.") self.app.logger.error( "Check the Kafka Bootstrap Servers '{}'!".format( self.bootstrap_server)) return False
def makeConsumer(self, TopicIn, consumerID, topicConfig={'auto.offset.reset': 'earliest'}, randomID=False, topicParams=[1, 1]): # Set up consumer if (randomID): consumerID = consumerID + '{:04d}'.format(random.randint(0, 9999)) finTopicIn = self.topicBasename + '.' + TopicIn print('Consumer Topic: {}'.format(finTopicIn)) if (finTopicIn not in self.topics): # raise ValueError('TopicIn does not exist: {}'.format(self.topic_in)) print('Warning: Consumer created the topic: {}'.format(finTopicIn)) new_topic = admin.NewTopic(finTopicIn, topicParams[0], topicParams[1]) self.kafka_admin.create_topics([ new_topic, ]) #print('TopicIn exists: {}'.format(TopicIn)) params = self.connectParams params[ 'group.id'] = consumerID # change this if you want to restart from the first/newest message params['default.topic.config'] = topicConfig consumer = Consumer(params) # producer only used for log consumer.subscribe([finTopicIn]) consumer.poll(0.1) #return lambda : self.consumeInput(finTopicIn,consumer) return (finTopicIn, consumer)
def create_topic(topics): availableTopics = adminClient.list_topics().topics topicsToDelete = [] for topic in topics: if topic in availableTopics: topicsToDelete.append(topic) delete_topic(topicsToDelete) time.sleep(0.1) newTopics = [] for topic in topics: newTopics.append(ckAdmin.NewTopic(topic, 1, 1)) fs = adminClient.create_topics(newTopics) print(fs) exceptions = [] for topic, f in fs.items(): try: f.result() print("Topic {} created".format(topic)) except Exception as e: exceptions.append("Failed to create topic {} : {}".format( topic, e)) if len(exceptions) != 0: raise Exception(''.join(x for x in exceptions))
def create_topic(topic, config, num_partitions=0): """ Creates a kafka topic based on given data :param topic: string :param config: dict :param num_partitions: int :return: None """ admin.NewTopic(topic=topic, num_partitions=num_partitions, config=config)
def to_stream_dir(path_dir, kafka_client): producer = Producer(KAFKA_CONFIG) for file in os.listdir(path_dir): if file[-6:] == "tar.gz": topic_name = file.split(".")[0] new_topic = admin.NewTopic(topic_name, 1, 1) kafka_client.create_topics([new_topic]) logging.info(f"Creating topic {topic_name}") to_stream_tar(file, producer, topic_name)
def create_system_topics(self, system_name): # Create the set of Kafka topics for system_name if self.bootstrap_server is None: self.app.logger.warning( "Skipped to create system topics as the platform-only mode is used." ) return None else: self.app.logger.debug("Creating Kafka Topic for new system.") # Create system topics self.k_admin_client.create_topics( [ # TODO: set num_partitions to 1 kafka_admin.NewTopic(system_name + ".log", num_partitions=3, replication_factor=1), kafka_admin.NewTopic(system_name + ".int", num_partitions=3, replication_factor=1), kafka_admin.NewTopic(system_name + ".ext", num_partitions=3, replication_factor=1) ]) self.app.logger.info( "Created system topics for '{}'".format(system_name))
def makeProducer(self, TopicOut, topicParams=[1, 1]): # setup producer finTopicOut = self.topicBasename + '.' + TopicOut print('Producer Topic: {}'.format(finTopicOut)) if not (finTopicOut in self.topics): print('Should create Topic: {}'.format(finTopicOut)) new_topic = admin.NewTopic(finTopicOut, topicParams[0], topicParams[1]) self.kafka_admin.create_topics([ new_topic, ]) #return lambda : self.produceOutput(finTopicOut,Producer(self.connectParams)) return finTopicOut, Producer(self.connectParams)
def testTopic(self, Topic, topicParams=[1, 1]): if (not self.connectParams): print('Should setConnection first!') return False self.topics = self.kafka_admin.list_topics( ).topics # refresh topic list if (Topic in self.topics): #print('TopicLog exists: {}'.format(TopicLog)) pass else: print('Should create TopicLog') new_topic = admin.NewTopic(Topic, topicParams[0], topicParams[1]) self.kafka_admin.create_topics([ new_topic, ]) return True
def create_topic(cfg: config.Config): """Ensure that the required Kafka topic exists.""" # based on code in https://github.com/confluentinc/confluent-kafka-python client = admin.AdminClient(cfg.get_kafka_config()) topic = admin.NewTopic(cfg.kafka_topic, num_partitions=3, replication_factor=1) topic_map = client.create_topics([topic]) for (topic, future) in topic_map.items(): try: future.result() logger.info('Topic %s created', topic) except Exception as err: if err.args[0].code() != kafka.KafkaError.TOPIC_ALREADY_EXISTS: raise logger.debug('Topic %s already exists', topic)
def test_topic_creation(): """ This method recreates the test topics. """ print("Recreate test topics.") topic_config = dict({"retention.ms": 3600000}) # store for one hour only res_dict = k_admin_client.create_topics( [kafka_admin.NewTopic(topic, num_partitions=1, replication_factor=1, config=topic_config) for topic in [KAFKA_TOPIC_IN_0, KAFKA_TOPIC_IN_1, KAFKA_TOPIC_OUT]]) # Wait for each operation to finish. for topic, f in res_dict.items(): try: f.result() # The result itself is None print(f"Topic '{topic}' created") except Exception as e: print(f"Topic '{topic}' couldn't be created: {e}") k_admin_client.poll(3.1) # small timeout for synchronizing topics = k_admin_client.list_topics(timeout=3.0).topics assert KAFKA_TOPIC_IN_0 in topics assert KAFKA_TOPIC_IN_1 in topics assert KAFKA_TOPIC_OUT in topics
def check_topic_existence(connection, topic): """ Check Topic Existence and Create it if needed""" chk_topic = connection.list_topics().topics if chk_topic.get(topic): print "\nINFO: %s topic Exists." % topic else: print "\nINFO: Creating the Topic %s" % topic # NewTopic specifies per-topic settings for passing to passed to AdminClient.create_topics(). setTopic = admin.NewTopic(topic, num_partitions=1, replication_factor=1) fs = connection.create_topics([setTopic], request_timeout=10) for t, f in fs.items(): try: f.result() # The result itself is None print("Topic {} Created".format(t)) except KafkaException as e: print("Falied to Create Topic {}: {}".format(t, e)) sys.exit()
def create_topics(self): topic_list = [] for topic_name in self.conf.topics: topic = self.conf.topics[topic_name] print(topic) topic_list.append( kad.NewTopic(topic.name, topic.num_partitions, topic.replication_factor)) # new_topic = kad.NewTopic('topic100', 3, 2) # Number-of-partitions = 1 # Number-of-replicas = 1 a = self.admin.create_topics( topic_list) # CREATE (a list(), so you can create multiple). # pprint.pprint(a) sleep(3) self.list_topics()
def create_topics(self, topics, number_of_partitions=3, replication_factor=1): new_topics = [ admin.NewTopic(topic, num_partitions=number_of_partitions, replication_factor=replication_factor) for topic in topics ] # Call create_topics to asynchronously create topics, a dict # of <topic,future> is returned. fs = self.admin_client.create_topics(new_topics) # Wait for operation to finish. # Timeouts are preferably controlled by passing request_timeout=15.0 # to the create_topics() call. # All futures will finish at the same time. for topic, f in fs.items(): try: f.result() # The result itself is None print("Topic {} created".format(topic)) except Exception as e: print("Failed to create topic {}: {}".format(topic, e))
def verify_admin(): """ Verify Admin API """ a = admin.AdminClient({'bootstrap.servers': bootstrap_servers}) our_topic = topic + '_admin_' + str(uuid.uuid4()) num_partitions = 2 topic_config = {"compression.type": "gzip"} # # First iteration: validate our_topic creation. # Second iteration: create topic. # for validate in (True, False): fs = a.create_topics([ admin.NewTopic(our_topic, num_partitions=num_partitions, config=topic_config, replication_factor=1) ], validate_only=validate, operation_timeout=10.0) for topic2, f in fs.items(): f.result() # trigger exception if there was an error # # Find the topic in list_topics # verify_topic_metadata(a, {our_topic: num_partitions}) # # Increase the partition count # num_partitions += 3 fs = a.create_partitions( [admin.NewPartitions(our_topic, new_total_count=num_partitions)], operation_timeout=10.0) for topic2, f in fs.items(): f.result() # trigger exception if there was an error # # Verify with list_topics. # verify_topic_metadata(a, {our_topic: num_partitions}) def verify_config(expconfig, configs): """ Verify that the config key,values in expconfig are found and matches the ConfigEntry in configs. """ for key, expvalue in expconfig.items(): entry = configs.get(key, None) assert entry is not None, "Config {} not found in returned configs".format( key) assert entry.value == str(expvalue), \ "Config {} with value {} does not match expected value {}".format(key, entry, expvalue) # # Get current topic config # resource = admin.ConfigResource(admin.RESOURCE_TOPIC, our_topic) fs = a.describe_configs([resource]) configs = fs[resource].result() # will raise exception on failure # Verify config matches our expectations verify_config(topic_config, configs) # # Now change the config. # topic_config["file.delete.delay.ms"] = 12345 topic_config["compression.type"] = "snappy" for key, value in topic_config.items(): resource.set_config(key, value) fs = a.alter_configs([resource]) fs[resource].result() # will raise exception on failure # # Read the config back again and verify. # fs = a.describe_configs([resource]) configs = fs[resource].result() # will raise exception on failure # Verify config matches our expectations verify_config(topic_config, configs) # # Delete the topic # fs = a.delete_topics([our_topic]) fs[our_topic].result() # will raise exception on failure print("Topic {} marked for deletion".format(our_topic))
def to_stream_dir(kafka_client): topic_name = "MgC-Topic" new_topic = admin.NewTopic(topic_name, 1, 1) kafka_client.create_topics([new_topic]) logging.info(f"Creating topic {topic_name}") return topic_name
data = res.json() return data with open(os.path.join(WORKING_DIRECTORY, 'city.list.json')) as json_file: cities = json.load(json_file) chile = [] for index, city in enumerate(cities): if city["country"] == "CL": chile.append(city) logging.info(f"Total cities {len(chile)}") calls = 0 kafka_client = admin.AdminClient(KAFKA_CONFIG) new_topic = admin.NewTopic("weather", 1, 1) kafka_client.create_topics([new_topic]) producer = Producer(KAFKA_CONFIG) while True: for index, city in enumerate(chile): calls += 1 query = current_city(city['id'], api_key) if query["cod"] != 200: logging.error(f"Error in {index} {city}") break else: logging.info(f"calls: {calls} city: {city['name']}") producer.produce("weather", pickle.dumps(query))
def verify_admin(): """ Verify Admin API """ a = admin.AdminClient({'bootstrap.servers': bootstrap_servers}) our_topic = topic + '_admin_' + str(uuid.uuid4()) num_partitions = 2 topic_config = {"compression.type": "gzip"} # # First iteration: validate our_topic creation. # Second iteration: create topic. # for validate in (True, False): fs = a.create_topics([ admin.NewTopic(our_topic, num_partitions=num_partitions, config=topic_config, replication_factor=1) ], validate_only=validate, operation_timeout=10.0) for topic2, f in fs.items(): f.result() # trigger exception if there was an error # # Find the topic in list_topics # verify_topic_metadata(a, {our_topic: num_partitions}) # # Increase the partition count # num_partitions += 3 fs = a.create_partitions( [admin.NewPartitions(our_topic, new_total_count=num_partitions)], operation_timeout=10.0) for topic2, f in fs.items(): f.result() # trigger exception if there was an error # # Verify with list_topics. # verify_topic_metadata(a, {our_topic: num_partitions}) # # Verify with list_groups. # # Produce some messages p = confluent_kafka.Producer({"bootstrap.servers": bootstrap_servers}) p.produce(our_topic, 'Hello Python!', headers=produce_headers) p.produce(our_topic, key='Just a key and headers', headers=produce_headers) def consume_messages(group_id): # Consume messages conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': group_id, 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'on_commit': print_commit_result, 'error_cb': error_cb, 'auto.offset.reset': 'earliest', 'enable.partition.eof': True } c = confluent_kafka.Consumer(conf) c.subscribe([our_topic]) eof_reached = dict() while True: msg = c.poll() if msg is None: raise Exception( 'Got timeout from poll() without a timeout set: %s' % msg) if msg.error(): if msg.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: print('Reached end of %s [%d] at offset %d' % (msg.topic(), msg.partition(), msg.offset())) eof_reached[(msg.topic(), msg.partition())] = True if len(eof_reached) == len(c.assignment()): print( 'EOF reached for all assigned partitions: exiting') break else: print('Consumer error: %s: ignoring' % msg.error()) break # Commit offset c.commit(msg, asynchronous=False) group1 = 'test-group-1' group2 = 'test-group-2' consume_messages(group1) consume_messages(group2) # list_groups without group argument groups = set(group.id for group in a.list_groups(timeout=10)) assert group1 in groups, "Consumer group {} not found".format(group1) assert group2 in groups, "Consumer group {} not found".format(group2) # list_groups with group argument groups = set(group.id for group in a.list_groups(group1)) assert group1 in groups, "Consumer group {} not found".format(group1) groups = set(group.id for group in a.list_groups(group2)) assert group2 in groups, "Consumer group {} not found".format(group2) def verify_config(expconfig, configs): """ Verify that the config key,values in expconfig are found and matches the ConfigEntry in configs. """ for key, expvalue in expconfig.items(): entry = configs.get(key, None) assert entry is not None, "Config {} not found in returned configs".format( key) assert entry.value == str(expvalue), \ "Config {} with value {} does not match expected value {}".format(key, entry, expvalue) # # Get current topic config # resource = admin.ConfigResource(admin.RESOURCE_TOPIC, our_topic) fs = a.describe_configs([resource]) configs = fs[resource].result() # will raise exception on failure # Verify config matches our expectations verify_config(topic_config, configs) # # Now change the config. # topic_config["file.delete.delay.ms"] = 12345 topic_config["compression.type"] = "snappy" for key, value in topic_config.items(): resource.set_config(key, value) fs = a.alter_configs([resource]) fs[resource].result() # will raise exception on failure # # Read the config back again and verify. # fs = a.describe_configs([resource]) configs = fs[resource].result() # will raise exception on failure # Verify config matches our expectations verify_config(topic_config, configs) # # Delete the topic # fs = a.delete_topics([our_topic]) fs[our_topic].result() # will raise exception on failure print("Topic {} marked for deletion".format(our_topic))
def enqueue(self, filename): """ This method tries to post a message to the pre-defined kafka topic. :param filename: :return status False or True: """ status = False if filename is None or len(filename) == 0: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: filename is None or invalid") return status if self.producer_instance is None: logging_to_console_and_syslog( "KafkaMsgQAPI: Producer instance is None. Trying to create one.." ) if not self.__producer_connect(): logging_to_console_and_syslog( "Unable to create producer instance.") return status if not self.is_topic_created: try: if self.producer_instance.list_topics(self.topic, timeout=1.0): logging_to_console_and_syslog( "Found topic name = {} in the zookeeper.".format( self.topic)) self.is_topic_created = True except KafkaException: kafka_admin_client = admin.AdminClient(self.producer_conf) logging_to_console_and_syslog("Creating topic {}.".format( self.topic)) ret = kafka_admin_client.create_topics(new_topics=[ admin.NewTopic(topic=self.topic, num_partitions=1) ], operation_timeout=1.0) logging_to_console_and_syslog("ret = {}".format(ret)) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Posting filename={} into " "kafka broker={}, topic={}".format(filename, self.broker_name, self.topic)) value = filename.encode('utf-8') try: # Produce line (without newline) self.producer_instance.produce( self.topic, value, callback=ConfluentKafkaMsgQAPI.delivery_callback) status = True except BufferError: sys.stderr.write('%% Local producer queue is full ' '(%d messages awaiting delivery): try again\n' % len(self.producer_instance)) status = False except: print("ConfluentKafkaMsgQAPI: Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) status = False else: event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \ "kafka broker={}, topic={}." \ .format(filename, self.broker_name, self.topic) logging_to_console_and_syslog(event) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. self.producer_instance.poll(timeout=0.1) # Wait until all messages have been delivered # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance)) self.producer_instance.flush(timeout=0.1) return status
import logging import pickle logging.basicConfig( level=logging.INFO, format='%(asctime)s %(levelname)s %(name)s.%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') STREAM_PATH = "http://stream.pushshift.io/?type=submissions" KAFKA_CONFIG = {"bootstrap.servers": "localhost:9092"} def process_message(msg, topic, producer): logging.info(f"event: {msg.event} id: {msg.id}") data = msg.data json_data = json.loads(data) producer.produce(topic, pickle.dumps(json_data)) if __name__ == "__main__": kafka_client = admin.AdminClient(KAFKA_CONFIG) queue = SSEClient(STREAM_PATH) new_topic = admin.NewTopic("reddit", 1, 1) kafka_client.create_topics([new_topic]) producer = Producer(KAFKA_CONFIG) for msg in queue: try: process_message(msg, "reddit", producer) except Exception as e: print(e)