def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: p = Producer() except TypeError as e: assert str(e) == "expected configuration dict" p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) p.produce('mytopic') p.produce('mytopic', value='somedata', key='a key') def on_delivery(err, msg): print('delivery', str) # Since there is no broker, produced messages should time out. assert err.code() == KafkaError._MSG_TIMED_OUT p.produce(topic='another_topic', value='testing', partition=9, callback=on_delivery) p.poll(0.001) p.flush(0.002) p.flush() try: p.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
def kafka_check_topic(self, myTopic): print("Connecting as kafka consumer to check for topic: " + myTopic) test = False conf = { "bootstrap.servers": self.bootstrapServerStr, "client.id": socket.gethostname(), "error_cb": self.kafka_producer_error_cb, "security.protocol": self.kafka_security_protocol, "sasl.mechanisms": self.kafka_sasl_mechanisms, "sasl.username": self.kafka_sasl_username, "sasl.password": self.kafka_sasl_password, "ssl.ca.location": self.kafka_ssl_ca_location, "ssl.certificate.location": self.kafka_ssl_certificate_location, "ssl.key.location": self.kafka_ssl_key_location, "ssl.key.password": self.kafka_ssl_key_password } while test == False: time.sleep(1) print("waiting for kafka producer to connect") try: # shouldn't be used directly: self.kafka_client = kafka.KafkaClient(self.kafka_broker) kafka_producer = KafkaProducer(conf) kafka_producer.list_topics(topic=myTopic, timeout=1) test = True except KafkaException as e: # print(e.args[0]) print("waiting for " + myTopic + " topic...")
class KafkaProducer: def __init__(self,conf): self.producer = Producer(conf) @cached(cache=TTLCache(maxsize=1024, ttl=60)) def get_topic_partition_count(self,topic_name): cmd = self.producer.list_topics(topic_name) tmd = cmd.topics.get(topic_name,None) pcount = 0 if tmd: pcount = len(tmd.partitions) return pcount def send_records(self,topic,records,headers): responses = [] def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: LOGGER.info('Message delivery failed: {}'.format(err)) else: LOGGER.info('Message delivered {} {} {} [{}] {}'.format( msg.timestamp(),msg.offset(), msg.topic(), msg.partition(), msg.key())) keystr = None if err or not msg.key() else msg.key().decode('UTF-8') if not err: report=dict(timestamp=msg.timestamp()[1],partition=msg.partition(),\ offset=msg.offset(),key=keystr) else: report=dict(error = f"{err}",status="PRODUCER_ERROR") responses.append(report) partition_count = self.get_topic_partition_count(topic) if not partition_count: LOGGER.warn(f"Requested topic {topic} does not exist") return "TOPIC_NOT_FOUND",dict(reason=f"Topic {topic} not found or not accessible to current user") LOGGER.info(f"sending records - {records}") for record in records: data = json.dumps(record["value"]) key = record.get('key') partition = record.get('partition',None) if partition: try: partition = int(partition) except: partition = 0 if partition: record_partition = partition % partition_count self.producer.produce(topic,value=data,partition=record_partition, key=key, callback=delivery_report,headers=headers) else: self.producer.produce(topic, data, key=key, callback=delivery_report,headers=headers) self.producer.poll(.01) self.producer.flush() LOGGER.info(f"Responses - {responses}") retval = {"key_schema_id": None,"value_schema_id": None,"offsets": responses} return None, responses
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: p = Producer() except TypeError as e: assert str(e) == "expected configuration dict" p = Producer({ 'socket.timeout.ms': 10, 'error_cb': error_cb, 'default.topic.config': { 'message.timeout.ms': 10 } }) p.produce('mytopic') p.produce('mytopic', value='somedata', key='a key') def on_delivery(err, msg): print('delivery', str) # Since there is no broker, produced messages should time out. assert err.code() == KafkaError._MSG_TIMED_OUT p.produce(topic='another_topic', value='testing', partition=9, callback=on_delivery) p.poll(0.001) p.flush(0.002) p.flush() try: p.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
class KafkaProducer(object): """ 生产者 """ def __init__(self, kafka_url, topic): self.__kafka_url = kafka_url self.__topic = topic self.producer = Producer({ 'bootstrap.servers': self.__kafka_url, 'log.connection.close': False, 'request.required.acks': 0, 'queue.buffering.max.ms': 5000, 'queue.buffering.max.messages': 10000, 'batch.num.messages': 200 }) self.create_topic() self.__partitions = self.producer.list_topics().topics[self.__topic].partitions def create_topic(self, num_partitions=3, replication_factor=1): if self.__topic not in self.producer.list_topics().topics: ac = AdminClient({'bootstrap.servers': self.__kafka_url}) futmap = ac.create_topics([NewTopic(self.__topic, num_partitions, replication_factor)]) time.sleep(2) def send_log(self, err, msg): pass def get_target_partition_id(self, key): return hash(key) % len(self.__partitions) def send(self, data, key): target_partition_id = self.get_target_partition_id(key) self.producer.produce(self.__topic, json.dumps(data).encode('utf-8'), partition=target_partition_id, callback=self.send_log) def flush(self): self.producer.flush()
def test_create_kafka_topic(self): #topic was already created in circlCI set up (config.yml step 6). Just checking that it's still up conf = kafka_utils.read_config('producer_google_chicago_1.config', 'producer_google_chicago_test') print("config file path is", conf) producer_config = {'bootstrap.servers': conf['bootstrap.servers']} p = Producer(producer_config) info = p.list_topics() topic = info.topics['christian_test'].topic partitions = info.topics['christian_test'].partitions num_partitions = list(partitions.keys()) result = (topic, len(num_partitions)) assert result == ('christian_test', 1)
def get_cluster_metadata(bootstrap_servers): """ Return cluster metadata for the cluster specified by bootstrap_servers. Parameters ---------- bootstrap_servers: str comma-delimited string of Kafka broker host:port, for example "localhost:9092" Returns ------- confluent_kafka.admin.ClusterMetadata """ kafka_producer = Producer({"bootstrap.servers": bootstrap_servers}) cluster_metadata = kafka_producer.list_topics() return cluster_metadata
def create_topics(self, topics: List[Tuple[str, int, int]]) -> None: """Creates a list of kafka topics. :param topics: List of tuples where: 1. element: name of the topic to create 2. element: number of partitions 3. element: number of replicas in the cluster """ producer = Producer( {'bootstrap.servers': svt.conf.get('kafka', 'bootstrap_servers')}) existing_topics = producer.list_topics().topics new_topics = [(topic, 1, 1) for topic in topics if topic not in existing_topics] nts = [NewTopic(top[0], top[1], top[2]) for top in topics] if nts: self.admin.create_topics(nts) log.info(f"Created topics: {topics}") else: log.info(f"Topics: {topics} are already existed!")
def _produce(self, key: str, value: str, headers: dict, payload: AsyncProducerPayload) -> None: config = {'bootstrap.servers': self.actor.service.address} if self.actor.service.ssl: config['security.protocol'] = 'SSL' producer = Producer(config) if payload.enable_topic_creation: topics = producer.list_topics(self.topic) if topics.topics[self.topic].error is not None: _create_topic(self.actor.service.address, self.topic, ssl=self.actor.service.ssl) producer.poll(0) producer.produce(self.topic, value, key=key, headers=headers, callback=_kafka_delivery_report) producer.flush()
def main(): args = get_args() producer = Producer({ 'bootstrap.servers': args.brokers, 'client.id': socket.gethostname() }) if args.operation is Operation.PRODUCE: for topic in args.topics.split(','): producer.produce( topic=topic.strip(), value=args.message, callback=lambda err, msg: message_ack(err, msg, topic)) producer.poll(5) if args.operation is Operation.LIST_TOPICS: print(stdiocolours.OKBLUE + "\nTOPICS:" + stdiocolours.ENDC) count = 1 for topic in producer.list_topics().topics: print(str(count) + ":", topic) count += 1 if args.operation is Operation.WATCH_PRODUCE: watch_dir()
class FanIn(AgentCommon): registered = False loggerName = None # All time is in seconds as float. We use time_ns to get highest resolution timet0 = 0 t0_on_first_mqtt = 0 MsgCount = 0 def __init__(self, configFile, debug, encrypt, TopicForThisProcess=False, batching=False, mqttcounter=False, mqttPassthrough=False): """ Class init """ self.sensors = [] self.mqttTopicList = [] self.loggerName = "simulator.agent." + __version__ + ".log" self.config = self.checkConfigurationFile( configFile, ["Daemon", "Logger", "Kafka", "MQTT"]) self.kafka_broker = self.config.get("Kafka", "kafka_broker") self.kafka_port = int(self.config.get("Kafka", "kafka_port")) self.kafkaProducerTopic = self.config.get("Kafka", "kafkaProducerTopic") self.kafka_security_protocol = self.config.get( "Kafka", "kafka_security.protocol") self.kafka_sasl_mechanisms = self.config.get("Kafka", "kafka_sasl.mechanisms") self.kafka_sasl_username = self.config.get("Kafka", "kafka_sasl.username") self.kafka_sasl_password = self.config.get("Kafka", "kafka_sasl.password") self.kafka_ssl_ca_location = self.config.get("Kafka", "kafka_ssl.ca.location") self.kafka_ssl_certificate_location = self.config.get( "Kafka", "kafka_ssl.certificate.location") self.kafka_ssl_key_location = self.config.get( "Kafka", "kafka_ssl.key.location") self.kafka_ssl_key_password = self.config.get( "Kafka", "kafka_ssl.key.password") self.myFanIn_mqtt_encryption_enabled = encrypt self.mqtt_broker = self.config.get("MQTT", "mqtt_broker") self.mqtt_port = int(self.config.get("MQTT", "mqtt_port")) self.mqttBatching = batching self.enableMQTTbatchCount = mqttcounter self.enableMQTTpassthrough = mqttPassthrough # create topic list: [ ("topicName1", int(qos1)),("topicName2", int(qos2)) ] # [ ("ibswitch", 0), ("redfish", 0)] addValue = [] if TopicForThisProcess != False: # multi-process version value = TopicForThisProcess.split(":") addValue.append(value[0]) addValue.append(int(value[1])) self.processID = os.getpid() self.threadID = threading.get_ident() self.logMPMT = "P-{:d} | T-{:d} |".format(self.processID, self.threadID) if not self.enableMQTTpassthrough: print( "MULTIPROC {:s} Starting FanIn Gateway in its process for {:s}. MQTT batch pass-through is DISABLED." .format(self.logMPMT, TopicForThisProcess)) else: print( "MULTIPROC {:s} Starting FanIn Gateway in its process for {:s}. MQTT batch pass-through is ENABLED." .format(self.logMPMT, TopicForThisProcess)) self.mqttTopicList.append(addValue) self.processID = os.getpid() self.myFanInGatewayName = "FanIn-test[" + str(self.processID) + "]" else: # single threaded version value = self.config.get("MQTT", "mqttSingleThreadTopic").split(":") addValue.append(value[0]) addValue.append(int(value[1])) self.mqttTopicList.append(addValue) self.myFanInGatewayName = "FanIn-test" addValue = [] value = self.config.get("MQTT", "mqttRegistrayionResultTopic").split(":") addValue.append(value[0]) addValue.append(int(value[1])) self.mqttTopicList.append(addValue) self.bootstrapServerStr = self.kafka_broker + ":" + str( self.kafka_port) # Register to the framework self.myFanInGateway_id = -1 self.myFanInGateway_debug = debug self.myFanInGateway_uuid = str(uuid.uuid4()) self.myFanInGateway_uid = self.myFanInGatewayName + str( random.randint(1, 100001)) # for thread safe counter self.myFanInGateway_threadLock = threading.Lock() self.myMQTTregistered = False self.kafka_producer = None self.kafka_consumer = None self.kafka_msg_counter = 1 self.kafka_msg_ack_received = 0 super().__init__(configFile, debug) #message counter per uuid self.myMQTTtopicCounterPerUUID = {} def resetLogLevel(self, logLevel): """ Resets the log level """ self.logger = KrakenMareLogger().getLogger(self.loggerName, logLevel) ####################################################################################### # MQTT agent methods # sends MQTT messages to Kafka (in batches) # TODO: do we need multiple threads here? # TODO: have processing method per client type OR topic for each sensor type to convert messages? def mqtt_on_message(self, client, userdata, message): if self.myFanInGateway_debug == True: print("mqtt_on_message start") query_data = [] k = 0 if message.topic == self.mqttTopicList[0][0]: self.done = False if self.timet0 == 0: self.timet0 = time.time_ns() / 1000000000 if self.t0_on_first_mqtt == 0: self.t0_on_first_mqtt = time.time_ns() / 1000000000 # if passthrough is enabled, send mqtt batch directly to kafka if not self.enableMQTTpassthrough: if self.mqttBatching == True: query_data = self.msg_serializer.decode_message( message.payload) else: query_data.append(message.payload) for data in query_data["tripletBatch"]: # check, if I know agent UUID and adjust my MQTT topic counter accordingly if (self.enableMQTTbatchCount == True): try: # if current batch count is -1 smaller then SEND count do nothing since this is ok if not (self.myMQTTtopicCounterPerUUID[ data["sensorUuid"]] == (int(data["sensorValue"]) - 1)) and not ( self.myMQTTtopicCounterPerUUID[ data["sensorUuid"]] - int(data["sensorValue"]) == 0): print( "ATTENTION: Missing # of MQTTbatches for agent UUID: " + str(data["sensorUuid"]) + " and topic: " + str(self.mqttTopicList[0][0]) + " is:" + str( int(data["sensorValue"]) - self.myMQTTtopicCounterPerUUID[ data["sensorUuid"]])) self.myMQTTtopicCounterPerUUID[ data["sensorUuid"]] = int(data["sensorValue"]) if self.myFanInGateway_debug == True: logMPMT = str("P-{:d} : ".format(os.getpid())) print(logMPMT + self.mqttTopicList[0][0] + "| UUID: " + str(data["sensorUuid"]) + "| MQTT batch count: " + str(self.myMQTTtopicCounterPerUUID[ data["sensorUuid"]])) except: self.myMQTTtopicCounterPerUUID[ data["sensorUuid"]] = int(data["sensorValue"]) if int(data["sensorValue"]) != 1: print( "ATTENTION: Missing # of MQTTbatches for agent UUID: " + str(data["sensorUuid"]) + " and topic: " + str(self.mqttTopicList[0][0]) + " is: " + str(int(data["sensorValue"]) - 1)) try: # print(str(data["sensorUuid"]) + ", " + str(data["sensorValue"])) raw_bytes = self.msg_serializer.encode_record_with_schema_id( self.send_time_series_schema_id, data) self.kafka_producer.produce( self.kafkaProducerTopic, raw_bytes, on_delivery=self.kafka_producer_on_delivery, ) self.kafka_msg_counter += 1 k += 1 if self.myFanInGateway_debug == True: print( str(self.kafka_msg_counter) + ":published to Kafka") if self.kafka_msg_counter % 1000 == 0: deltat = time.time_ns() / 1000000000 - self.timet0 deltaMsg = self.kafka_msg_counter - self.MsgCount self.MsgCount = self.kafka_msg_counter self.timet0 = time.time_ns() / 1000000000 elapsed = (int)(time.time_ns() / 1000000000 - self.t0_on_first_mqtt) logMPMT = "{:d} secs | Process-{:d} | Thread-{:d} | TopicMqtt-{:s}".format( elapsed, os.getpid(), threading.get_ident(), str(message.topic), ) print( logMPMT + " | " + str(self.kafka_msg_counter) + " messages published to Kafka, rate = {:.2f} msg/sec" .format(deltaMsg / deltat)) except BufferError as e1: print( "%% Local producer queue is full (%d messages awaiting delivery): try again\n" % len(self.kafka_producer)) print(e1) except KafkaException as e2: print( "MQTT message not published to Kafka! Cause is ERROR:" ) print(e2) if not k == 47: print("Samples in last processed message was: " + str(k)) else: #passthrough try: # print(str(data["sensorUuid"]) + ", " + str(data["sensorValue"])) self.kafka_producer.produce( self.kafkaProducerTopic, message.payload, on_delivery=self.kafka_producer_on_delivery, ) self.kafka_msg_counter += 1 if self.myFanInGateway_debug == True: print( str(self.kafka_msg_counter) + ":published to Kafka") if self.kafka_msg_counter % 1000 == 0: deltat = time.time_ns() / 1000000000 - self.timet0 deltaMsg = self.kafka_msg_counter - self.MsgCount self.MsgCount = self.kafka_msg_counter self.timet0 = time.time_ns() / 1000000000 elapsed = (int)(time.time_ns() / 1000000000 - self.t0_on_first_mqtt) logMPMT = "{:d} secs | Process-{:d} | Thread-{:d} | TopicMqtt-{:s}".format( elapsed, os.getpid(), threading.get_ident(), str(message.topic), ) print( logMPMT + " | " + str(self.kafka_msg_counter) + " MQTT batch messages published to Kafka, rate = {:.2f} msg/sec" .format(deltaMsg / deltat)) except BufferError as e1: print( "%% Local producer queue is full (%d messages awaiting delivery): try again\n" % len(self.kafka_producer)) print(e1) except KafkaException as e2: print( "MQTT message not published to Kafka! Cause is ERROR:") print(e2) self.mqttMsgTimer = time.time() else: if self.myFanInGateway_debug == True: print("Not ibswitch topic") # END MQTT agent methods ####################################################################################### ####################################################################################### # Kafka agent methods # Kafka error printer def kafka_producer_error_cb(self, err): logMPMT = "P-{:d} | T-{:d} |".format(os.getpid(), threading.get_ident()) print("{:s} KAFKA_PROD_CALLBACK_ERR : {:s}".format(logMPMT, str(err))) def kafka_producer_on_delivery(self, err, msg): if err: print( "KAFKA_MESSAGE_CALLBACK_ERR : %% Message failed delivery: %s - to %s [%s] @ %s\n" % (err, msg.topic(), str(msg.partition()), str(msg.offset()))) else: self.kafka_msg_ack_received += 1 if self.myFanInGateway_debug == True: print("%% Message delivered to %s [%d] @ %d\n" % (msg.topic(), msg.partition(), msg.offset())) # connect to Kafka broker as producer to check topic 'myTopic' def kafka_check_topic(self, myTopic): print("Connecting as kafka consumer to check for topic: " + myTopic) test = False conf = { "bootstrap.servers": self.bootstrapServerStr, "client.id": socket.gethostname(), "error_cb": self.kafka_producer_error_cb, "security.protocol": self.kafka_security_protocol, "sasl.mechanisms": self.kafka_sasl_mechanisms, "sasl.username": self.kafka_sasl_username, "sasl.password": self.kafka_sasl_password, "ssl.ca.location": self.kafka_ssl_ca_location, "ssl.certificate.location": self.kafka_ssl_certificate_location, "ssl.key.location": self.kafka_ssl_key_location, "ssl.key.password": self.kafka_ssl_key_password } while test == False: time.sleep(1) print("waiting for kafka producer to connect") try: # shouldn't be used directly: self.kafka_client = kafka.KafkaClient(self.kafka_broker) kafka_producer = KafkaProducer(conf) kafka_producer.list_topics(topic=myTopic, timeout=1) test = True except KafkaException as e: # print(e.args[0]) print("waiting for " + myTopic + " topic...") # connect to Kafka broker as producer def kafka_producer_connect(self): test = False conf = { "bootstrap.servers": self.bootstrapServerStr, "client.id": socket.gethostname(), "error_cb": self.kafka_producer_error_cb, "security.protocol": self.kafka_security_protocol, "sasl.mechanisms": self.kafka_sasl_mechanisms, "sasl.username": self.kafka_sasl_username, "sasl.password": self.kafka_sasl_password, "ssl.ca.location": self.kafka_ssl_ca_location, "ssl.certificate.location": self.kafka_ssl_certificate_location, "ssl.key.location": self.kafka_ssl_key_location, "ssl.key.password": self.kafka_ssl_key_password, "linger.ms": 1000, "message.max.bytes": 2560000, "queue.buffering.max.messages": 2000000, } while test == False: time.sleep(2) print("waiting for kafka producer to connect") try: # shouldn't be used directly: self.kafka_client = kafka.KafkaClient(self.kafka_broker) self.kafka_producer = KafkaProducer(conf) self.kafka_producer.list_topics(timeout=1) test = True except KafkaException as e: print(e.args[0]) print("waiting for Kafka brokers..." + self.bootstrapServerStr) print(self.__class__.__name__ + "." + inspect.currentframe().f_code.co_name + ": producer connected") # END Kafka agent methods ####################################################################################### def signal_handler(self, signal, frame): self.mqtt_close() sys.exit(0) # main method of FanIn def run(self): # local and debug flag are not used from here at the moment # self.kafka_check_topic("registration-result") self.kafka_check_topic(self.kafkaProducerTopic) # self.mqtt_registration() self.kafka_producer_connect() # TODO: should be own process via process class (from multiprocessing import Process) # generate list of mqtt topics to subscribe, used in initial connection and to re-subscribe on re-connect mqttSubscriptionTopics = self.mqttTopicList print("MQTT topic list:" + str(self.mqttTopicList)) # start mqtt client myLoopForever = False myCleanSession = True self.mqtt_init( self.myFanInGateway_uuid, mqttSubscriptionTopics, myLoopForever, myCleanSession, self.myFanIn_mqtt_encryption_enabled, ) # start listening to data # self.mqtt_subscription() regularLog = 300 logMPMT = str("P-{:d} : ".format(os.getpid())) self.done = False self.mqttMsgTimer = time.time() while True: time.sleep(0.05) self.kafka_producer.poll(0) regularLog -= 1 if regularLog <= 0: regularLog = 300 if self.mqttMsgTimer + 10 < time.time( ) and self.done == False and self.enableMQTTbatchCount == True: for uuid in self.myMQTTtopicCounterPerUUID: print(logMPMT + self.mqttTopicList[0][0] + "| UUID: " + str(uuid) + "| MQTT batch count: " + str(self.myMQTTtopicCounterPerUUID[uuid])) self.done = True self.mqtt_close print("FanIn terminated")
class KafkaProducer: def __init__(self, conf): self.producer = Producer(conf) def get_topic_list(self, showInternal=True): cmd = self.producer.list_topics() tmd = cmd.topics topic_list = list(tmd.keys()) if not showInternal: topic_list = [t for t in topic_list if not (t and t[0] == '_')] return None, topic_list def get_topic_partitions(self, topic_name): cmd = self.producer.list_topics(topic_name) tmd = cmd.topics.get(topic_name, None) if not tmd: return 'TOPIC_NOT_FOUND', f"{topic_name} not found" partitions = [dict(partition=partition.id,leader=partition.leader,\ replicas=[{'broker':replica_id,\ 'leader':replica_id==partition.leader,\ 'in_sync': replica_id in partition.isrs} for replica_id in partition.replicas]) for _,partition in tmd.partitions.items()] result = dict(name=tmd.topic, partitions=partitions) result['configs'] = { f'k{i}': f'not implemented v{i}' for i in range(5) } return None, result # @cached(cache=TTLCache(maxsize=1024, ttl=60)) def get_topic_partition_count(self, topic_name): cmd = self.producer.list_topics(topic_name) tmd = cmd.topics.get(topic_name, None) pcount = 0 if tmd: pcount = len(tmd.partitions) return pcount def send_records(self, topic, records, headers): responses = [] def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: LOGGER.info('Message delivery failed: {}'.format(err)) else: LOGGER.info('Message delivered {} {} {} [{}] {}'.format( msg.timestamp(), msg.offset(), msg.topic(), msg.partition(), msg.key())) keystr = None if err or not msg.key() else msg.key().decode( 'UTF-8') if not err: report=dict(timestamp=msg.timestamp()[1],partition=msg.partition(),\ offset=msg.offset(),key=keystr) else: report = dict(error=f"{err}", status="PRODUCER_ERROR") responses.append(report) partition_count = self.get_topic_partition_count(topic) if not partition_count: LOGGER.warn(f"Requested topic {topic} does not exist") return "TOPIC_NOT_FOUND", dict( reason= f"Topic {topic} not found or not accessible to current user") LOGGER.info(f"sending records - {records}") for record in records: data = json.dumps(record["value"]) key = record.get('key') partition = record.get('partition', None) if partition: try: partition = int(partition) except: partition = 0 if partition: record_partition = partition % partition_count self.producer.produce(topic, value=data, partition=record_partition, key=key, callback=delivery_report, headers=headers) else: self.producer.produce(topic, data, key=key, callback=delivery_report, headers=headers) self.producer.poll(.01) self.producer.flush() LOGGER.info(f"Responses - {responses}") retval = { "key_schema_id": None, "value_schema_id": None, "offsets": responses } return None, responses
class KafkaProducerConfluent: """ Продюсер (Производитель). confluent_kafka """ """ Инициализация """ def __init__(self, hosts=None, configuration=None, use_tx=False, one_topic_name=None, auto_flush_size=0, flush_is_bad=False): """ :param configuration: https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md """ if configuration is None: self.configuration = { 'client.id': default_cfg.DEFAULT_CONNECTION_OPTION_ADMIN['client_id'], 'socket.timeout.ms': default_cfg.DEFAULT_BROKER_TIMEOUT_MS_OPERATIONS } if use_tx: self.configuration['transactional.id'] = str(uuid4()) else: self.configuration = configuration if hosts: self.configuration['bootstrap.servers'] = hosts else: if not self.configuration.get('bootstrap.servers'): self.configuration[ 'bootstrap.servers'] = GeneralConfig.KAFKA_URL self.use_tx = use_tx self.topic_part_itr = None self.topic_parts = None self.one_topic_name = one_topic_name if auto_flush_size: self.auto_flush = True else: self.auto_flush = False self.auto_flush_size = auto_flush_size self.auto_flush_itr = 0 self.flush_is_bad = flush_is_bad """ Контекст """ def __enter__(self): self.auto_flush_itr = 0 self.producer = Producer(self.configuration) self.update_partition_settings(name_topic=self.one_topic_name) if self.use_tx: try: self.producer.abort_transaction( default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC) except Exception: pass self.producer.init_transactions( default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC) self.producer.begin_transaction() return self def __exit__(self, exc_type, exc_val, exc_tb): """ После выхода :param exc_type: :param exc_val: :param exc_tb: :return: """ self.auto_flush_itr = 0 if self.use_tx: if exc_type: self.producer.abort_transaction() else: # flush вызывается под капотом commit_transaction self.producer.commit_transaction( default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC) else: self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) del self """ Вспомогательные операции """ def get_list_topics(self): """ Все топики :return: """ try: res = self.producer.list_topics().topics return res except Exception: return None def get_one_topic(self, name): """ Один топик по имени :param name: :return: """ try: res = self.producer.list_topics(topic=name).topics return res except Exception: return None def update_partition_settings(self, name_topic=None): """ Обновить настройки партиций всех топиков :param name_topic: - либо конкретного топика :return: """ if self.topic_parts is None: self.topic_part_itr = {} self.topic_parts = {} if name_topic is None: topics = self.get_list_topics() else: if self.topic_parts.get(name_topic) is not None: self.topic_parts.pop(name_topic) topics = self.get_one_topic(name_topic) for name, topic_obj in topics.items(): list_partitions = list(topic_obj.partitions) if len(list_partitions) <= 1: continue self.topic_parts[name] = list_partitions self.topic_part_itr[name] = 0 def put_data(self, key, value, topic=None, callback=None, partition=None, poll_time=0): """ Поместить данные в очередь на обработку для брокера сообщений Чтобы не думать об этом - дампим в строку джсона сразу. Имя топика и ключа - строго строкой :param key: - ключ сообщения. Сделать пустым если исползуется автопопил сообщений средствами кафки :param value: - значение сообщения :param topic: - имя топика - если не задано -то будет применяться имя основного топика self.one_topic_name :param partition: - раздел топика(число). если не указано - то балансировка нагрузки по разделам :param callback: func(err, msg): if err is not None... :return: """ dict_args = self._put_validation_and_transform(key=key, value=value, topic=topic, callback=callback, partition=partition) self._put_data_default(dict_args=dict_args, poll_time=poll_time) def _put_validation_and_transform(self, key, value, topic=None, callback=None, partition=None): """ Для разных алгоритмов вставки - формирует словарь аргументов вставки """ if topic is None and self.one_topic_name is None: raise AttributeError('NEED TOPIC NAME!') if topic is None: topic = self.one_topic_name dict_args = { 'topic': str(topic), 'value': jsd(value), } if key: dict_args['key']: str(key) if callback: dict_args['callback'] = callback if partition: # Прямое задание позиции dict_args['partition'] = partition else: # Смещение позиции равномерно top_name = dict_args['topic'] topic_parts = self.topic_parts.get(top_name) if topic_parts: current_position = self.topic_part_itr[top_name] if key: # Партиция нужна если есть ключ dict_args['partition'] = topic_parts[current_position] current_position += 1 if current_position >= len(topic_parts): current_position = 0 self.topic_part_itr[top_name] = current_position return dict_args def _put_data_default(self, dict_args, poll_time=0): """ Первоначальный замысел вставки с доработками """ if self.auto_flush: # Авто-ожидание приёма буфера сообщений - третья версия self.producer.produce(**dict_args) self.producer.poll(poll_time) self.auto_flush_itr = self.auto_flush_itr + 1 if self.auto_flush_itr >= self.auto_flush_size: self.auto_flush_itr = 0 self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: if self.flush_is_bad: # Вторая версия алгоритма - флушить по факту try: self.producer.produce(**dict_args) self.producer.poll(poll_time) except BufferError: # Дожидаемся когда кафка разгребёт очередь self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: # Первая версия self.producer.produce(**dict_args) self.producer.poll(poll_time) def put_data_direct(self, key, value, topic=None, callback=None, partition=None): """ Прямая вставка с преобразованием данных. Метод poll не используется """ dict_args = self._put_validation_and_transform(key=key, value=value, topic=topic, callback=callback, partition=partition) if self.auto_flush: self.producer.produce(**dict_args) self.auto_flush_itr = self.auto_flush_itr + 1 if self.auto_flush_itr >= self.auto_flush_size: self.auto_flush_itr = 0 self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: if self.flush_is_bad: try: self.producer.produce(**dict_args) except BufferError: # Дожидаемся когда кафка разгребёт очередь self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: self.producer.produce(**dict_args)
class Publisher: """ A class for publishing bluesky documents to a Kafka broker. The intention is that Publisher objects be subscribed to a RunEngine. Reference: https://github.com/confluentinc/confluent-kafka-python/issues/137 There is no default configuration. A reasonable production configuration for use with bluesky is Kafka's "idempotent" configuration specified by producer_config = { "enable.idempotence": True } This is short for producer_config = { "acks": "all", # acknowledge only after all brokers receive a message "retries": sys.maxsize, # retry indefinitely "max.in.flight.requests.per.connection": 5 # maintain message order *when retrying* } This means three things: 1) delivery acknowledgement is not sent until all replicate brokers have received a message 2) message delivery will be retried indefinitely (messages will not be dropped by the Producer) 3) message order will be maintained during retries A reasonable testing configuration is producer_config={ "acks": 1, "request.timeout.ms": 5000, } Parameters ---------- topic : str Topic to which all messages will be published. bootstrap_servers: str Comma-delimited list of Kafka server addresses as a string such as ``'127.0.0.1:9092'``. key : str Kafka "key" string. Specify a key to maintain message order. If None is specified no ordering will be imposed on messages. producer_config : dict, optional Dictionary configuration information used to construct the underlying Kafka Producer. on_delivery : function(err, msg), optional A function to be called after a message has been delivered or after delivery has permanently failed. flush_on_stop_doc : bool, optional False by default, set to True to flush() the underlying Kafka Producer when a stop document is published. serializer : function, optional Function to serialize data. Default is pickle.dumps. Example ------- Publish documents from a RunEngine to a Kafka broker on localhost port 9092. >>> publisher = Publisher( >>> topic="bluesky.documents", >>> bootstrap_servers='localhost:9092', >>> key="abcdef" >>> ) >>> RE = RunEngine({}) >>> RE.subscribe(publisher) """ def __init__( self, topic, bootstrap_servers, key, producer_config=None, on_delivery=None, flush_on_stop_doc=False, serializer=msgpack.dumps, ): self.topic = topic self._bootstrap_servers = bootstrap_servers self._key = key # in the case that "bootstrap.servers" is included in producer_config # combine it with the bootstrap_servers argument self._producer_config = dict() if producer_config is not None: self._producer_config.update(producer_config) if "bootstrap.servers" in self._producer_config: self._producer_config["bootstrap.servers"] = ",".join([ bootstrap_servers, self._producer_config["bootstrap.servers"] ]) else: self._producer_config["bootstrap.servers"] = bootstrap_servers logger.debug("producer configuration: %s", self._producer_config) if on_delivery is None: self.on_delivery = default_delivery_report else: self.on_delivery = on_delivery self._flush_on_stop_doc = flush_on_stop_doc self._producer = Producer(self._producer_config) self._serializer = serializer def __str__(self): return ("bluesky_kafka.Publisher(" f"topic='{self.topic}'," f"key='{self._key}'," f"bootstrap_servers='{self._bootstrap_servers}'" f"producer_config='{self._producer_config}'" ")") def get_cluster_metadata(self, timeout=5.0): """ Return information about the Kafka cluster and this Publisher's topic. Parameters ---------- timeout: float, optional maximum time in seconds to wait before timing out, -1 for infinite timeout, default is 5.0s Returns ------- cluster_metadata: confluent_kafka.admin.ClusterMetadata """ cluster_metadata = self._producer.list_topics(topic=self.topic, timeout=timeout) return cluster_metadata def __call__(self, name, doc): """ Publish the specified name and document as a Kafka message. Flushing the Producer on every stop document guarantees that _at the latest_ all documents for a run will be delivered to the broker(s) at the end of the run. Without this flush the documents for a short run may wait for some time to be delivered. The flush call is blocking so it is a bad idea to flush after every document but reasonable to flush after a stop document since this is the end of the run. Parameters ---------- name: str Document name, one of "start", "descriptor", "event", "resource", "datum", "stop". doc: dict event-model document dictionary """ logger.debug( "publishing document to Kafka broker(s):" "topic: '%s'\n" "key: '%s'\n" "name: '%s'\n" "doc: %s", self.topic, self._key, name, doc, ) self._producer.produce( topic=self.topic, key=self._key, value=self._serializer((name, doc)), on_delivery=self.on_delivery, ) if self._flush_on_stop_doc and name == "stop": self.flush() def flush(self): """ Flush all buffered messages to the broker(s). """ logger.debug( "flushing Kafka Producer for topic '%s' and key '%s'", self.topic, self._key, ) self._producer.flush()
class ConfluentKafkaMsgQAPI: """ This class provides API's into interact with Kafka Queue. """ def __init__(self, is_producer=False, is_consumer=False, perform_subscription=False, thread_identifier=None): if not is_producer and not is_consumer: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer." ) pass self.producer_instance = None self.consumer_instance = None self.broker_name = None self.topic = None self.producer_conf = None self.consumer_conf = None self.is_topic_created = False self.perform_subscription = perform_subscription self.thread_identifier = thread_identifier self.__read_environment_variables() if is_producer: self.__producer_connect() if is_consumer: self.__consumer_connect() def __read_environment_variables(self): """ This method is used to read the environment variables defined in the OS. :return: """ while self.broker_name is None or \ self.topic is None: time.sleep(2) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: " "Trying to read the environment variables...") self.broker_name = os.getenv("broker_name_key", default=None) self.topic = os.getenv("topic_key", default=None) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name)) logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format( self.topic)) # Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). @staticmethod def delivery_callback(err, msg): if err: logging_to_console_and_syslog('%% Message failed delivery: %s\n' % err) else: logging_to_console_and_syslog( '%% Message delivered to %s [%d] @ %s\n' % (msg.topic(), msg.partition(), str(msg.offset()))) def __producer_connect(self): """ This method tries to connect to the kafka broker based upon the type of kafka. :return: """ while self.producer_instance is None: try: self.producer_conf = {'bootstrap.servers': self.broker_name} # Create Producer instance self.producer_instance = Producer(**self.producer_conf) except: print("Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) time.sleep(5) else: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Successfully " "connected to broker_name={}".format(self.broker_name)) def __consumer_connect(self): status = False try: if self.perform_subscription: self.__consumer_connect_to_broker() self.__subscribe_to_a_topic() # self.__iterate_over_kafka_consumer_instance_messages() else: self.__consumer_connect_to_kafka_broker_and_to_a_topic() # self.__consumer_poll_for_new_messages() status = True except: logging_to_console_and_syslog( "{}:Exception occurred while polling for " "a message from kafka Queue. {} ".format( self.thread_identifier, sys.exc_info()[0])) print("{}:Exception in user code:".format(self.thread_identifier)) print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) return status def enqueue(self, filename): """ This method tries to post a message to the pre-defined kafka topic. :param filename: :return status False or True: """ status = False if filename is None or len(filename) == 0: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: filename is None or invalid") return status if self.producer_instance is None: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: instance is None") return status if not self.is_topic_created: try: if self.producer_instance.list_topics(self.topic, timeout=1.0): logging_to_console_and_syslog( "Found topic name = {} in the zookeeper.".format( self.topic)) self.is_topic_created = True except KafkaException: self.kafka_admin_client = admin.AdminClient(self.producer_conf) logging_to_console_and_syslog("Creating topic {}.".format( self.topic)) ret = self.kafka_admin_client.create_topics( new_topics=[ admin.NewTopic(topic=self.topic, num_partitions=1) ], operation_timeout=1.0) logging_to_console_and_syslog("ret = {}".format(ret)) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Posting filename={} into " "kafka broker={}, topic={}".format(filename, self.broker_name, self.topic)) value = filename.encode('utf-8') try: # Produce line (without newline) self.producer_instance.produce( self.topic, value, callback=ConfluentKafkaMsgQAPI.delivery_callback) status = True except BufferError: sys.stderr.write('%% Local producer queue is full ' '(%d messages awaiting delivery): try again\n' % len(self.producer_instance)) status = False except: print("ConfluentKafkaMsgQAPI: Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) status = False else: event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \ "kafka broker={}, topic={}." \ .format(filename, self.broker_name, self.topic) logging_to_console_and_syslog(event) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. self.producer_instance.poll(timeout=0.1) # Wait until all messages have been delivered # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance)) self.producer_instance.flush(timeout=0.1) return status def __consumer_connect_to_kafka_broker_and_to_a_topic(self): """ This method tries to connect to the kafka broker. :return: """ pass def __consumer_poll_for_new_messages(self): logging_to_console_and_syslog( "{}: Polling the kafka consumer instance for " "new messages in the topic {}.".format(self.thread_identifier, self.topic)) # Read messages from Kafka, print to stdout try: while True: msg = self.consumer_instance.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: # Proper message sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) print(msg.value()) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') finally: # Close down consumer to commit final offsets. self.consumer_instance.close() """ msg = self.consumer_instance.poll(timeout=5.0) if msg is None: return None if msg.error(): raise KafkaException(msg.error()) else: logging_to_console_and_syslog("msg = {}".format(msg)) logging_to_console_and_syslog('Consumer:{}: Rcvd msg %% %s [%d] at offset %d with key %s: value : %s\n' .format(self.thread_identifier, msg.topic(), msg.partition(), msg.offset(), str(msg.key()), str(msg.value())) ) return msg.value() """ return None def __consumer_connect_to_broker(self): """ This method tries to connect to the kafka broker. :return: """ if self.consumer_instance: return # Consumer configuration # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md """ self.consumer_conf = {'bootstrap.servers': self.broker_name, 'group.id': 'kafka-consumer{}'.format(self.thread_identifier), 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest'} """ consumer_conf = { 'bootstrap.servers': self.broker_name, 'group.id': 'group', 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest' } consumer_conf['stats_cb'] = stats_cb consumer_conf['statistics.interval.ms'] = 0 # Create logger for consumer (logs will be emitted when poll() is called) logger = logging.getLogger('consumer') logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setFormatter( logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s')) logger.addHandler(handler) while self.consumer_instance is None: try: logging_to_console_and_syslog( "Consumer:{}:Trying to connect to broker_name={}".format( self.thread_identifier, self.broker_name)) # Create Consumer instance # Hint: try debug='fetch' to generate some log messages self.consumer_instance = Consumer(consumer_conf, logger=logger) except: logging_to_console_and_syslog( "Consumer:{}:Exception in user code:".format( self.thread_identifier)) logging_to_console_and_syslog("-" * 60) traceback.print_exc(file=sys.stdout) logging_to_console_and_syslog("-" * 60) time.sleep(5) logging_to_console_and_syslog("Consumer:{}:Consumer Successfully " "connected to broker_name={}".format( self.thread_identifier, self.broker_name)) @staticmethod def print_assignment(consumer, partitions): logging_to_console_and_syslog('consumer = {}, Assignment {}:', repr(consumer), partitions) def __subscribe_to_a_topic(self): try: # Subscribe to topics cluster_meta_data = self.consumer_instance.list_topics(self.topic, timeout=0.3) logging_to_console_and_syslog("ClusterMetaData={}".format( repr(cluster_meta_data))) if self.topic not in cluster_meta_data.topics.keys(): logging_to_console_and_syslog( "Topic {} is " "not found in the ClusterMetaData {}".format( self.topic, repr(cluster_meta_data.topics.keys()))) raise KafkaException def print_assignment(consumer, partitions): print('Assignment:', partitions) # Subscribe to topics self.consumer_instance.subscribe(self.topics, on_assign=print_assignment) """ self.consumer_instance.subscribe(self.topic, on_assign=ConfluentKafkaMsgQAPI.print_assignment) """ except: logging_to_console_and_syslog( "Consumer:{}: Subscribed to topic {}.".format( self.thread_identifier, self.topic)) return True def __iterate_over_kafka_consumer_instance_messages(self): """ logging_to_console_and_syslog("Consumer:{}: dequeue {}." .format(self.thread_identifier, self.topic)) """ pass def dequeue(self): try: if self.perform_subscription: # logging_to_console_and_syslog("{}:Perform __consumer_poll_for_new_messages." # .format(self.thread_identifier)) return self.__consumer_poll_for_new_messages() else: # logging_to_console_and_syslog("{}:Perform __iterate_over_kafka_consumer_instance_messages." # .format(self.thread_identifier)) return self.__iterate_over_kafka_consumer_instance_messages() except: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI:Exception occurred while polling for " "a message from kafka Queue. {} ".format(sys.exc_info()[0])) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI:Exception in user code:") logging_to_console_and_syslog("-" * 60) traceback.print_exc(file=sys.stdout) logging_to_console_and_syslog("-" * 60) return None def cleanup(self): pass
from confluent_kafka import Producer import random import time conf = { 'bootstrap.servers': '131.247.3.206:9092', 'client.id': 'producerAdil1', } producer = Producer(conf) print(producer.list_topics()) x = random.randint(1, 1000) def on_callback(err, msg): if err: print(err) else: print(msg) ts = time.time() producer.produce('transactions', key='1', value=f'{ts}: ${x}', on_delivery=on_callback) producer.flush() print(x)
def receipt(err, msg): """Defines an acknowledgments for the producer and consumer""" if err is not None: print("Error: {0}".format(err)) else: print("{0}: Message on topic {1} on partition {2} with a value" " of {3}".format( time.strftime('%Y-%m%d %H:%M:%S', time.localtime(msg.timestamp()[1] / 1000)), msg.topic(), msg.partition(), msg.value().decode('utf-8'))) print("Topics available to publish: {0}".format(', '.join( p.list_topics().topics))) for _ in range(10): data = { 'name': fake.name(), 'age': fake.random_int(min=18, max=101, step=1), 'street': fake.street_address(), 'city': fake.city(), 'state': fake.state(), 'zip': fake.zipcode(), 'lng': float(fake.longitude()), 'lat': float(fake.latitude()) } m = json.dumps(data)
class ConfluentKafkaMsgQAPI: """ This class provides API's into interact with Kafka Queue. """ def __init__(self, is_producer=False, is_consumer=False, perform_subscription=False, thread_identifier=None): if not is_producer and not is_consumer: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer." ) pass self.producer_instance = None self.consumer_instance = None self.broker_name = None self.topic = None self.producer_conf = None self.consumer_conf = None self.is_topic_created = False self.perform_subscription = perform_subscription self.thread_identifier = thread_identifier self.__read_environment_variables() # if is_producer: # self.__producer_connect() # if is_consumer: # self.__consumer_connect() def __read_environment_variables(self): """ This method is used to read the environment variables defined in the OS. :return: """ while self.broker_name is None or \ self.topic is None: time.sleep(2) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: " "Trying to read the environment variables...") self.broker_name = os.getenv("broker_name_key", default=None) self.topic = os.getenv("topic_key", default=None) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name)) logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format( self.topic)) # Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). @staticmethod def delivery_callback(err, msg): if err: logging_to_console_and_syslog('%% Message failed delivery: %s\n' % err) else: logging_to_console_and_syslog( '%% Message delivered to %s [%d] @ %s\n' % (msg.topic(), msg.partition(), str(msg.offset()))) def __producer_connect(self): """ This method tries to connect to the kafka broker based upon the type of kafka. :return: """ is_connected = False if self.producer_instance is None: try: self.producer_conf = {'bootstrap.servers': self.broker_name} # Create Producer instance self.producer_instance = Producer(**self.producer_conf) is_connected = True except: print("Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) time.sleep(5) else: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Successfully " "connected to broker_name={}".format(self.broker_name)) return is_connected def enqueue(self, filename): """ This method tries to post a message to the pre-defined kafka topic. :param filename: :return status False or True: """ status = False if filename is None or len(filename) == 0: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: filename is None or invalid") return status if self.producer_instance is None: logging_to_console_and_syslog( "KafkaMsgQAPI: Producer instance is None. Trying to create one.." ) if not self.__producer_connect(): logging_to_console_and_syslog( "Unable to create producer instance.") return status if not self.is_topic_created: try: if self.producer_instance.list_topics(self.topic, timeout=1.0): logging_to_console_and_syslog( "Found topic name = {} in the zookeeper.".format( self.topic)) self.is_topic_created = True except KafkaException: kafka_admin_client = admin.AdminClient(self.producer_conf) logging_to_console_and_syslog("Creating topic {}.".format( self.topic)) ret = kafka_admin_client.create_topics(new_topics=[ admin.NewTopic(topic=self.topic, num_partitions=1) ], operation_timeout=1.0) logging_to_console_and_syslog("ret = {}".format(ret)) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Posting filename={} into " "kafka broker={}, topic={}".format(filename, self.broker_name, self.topic)) value = filename.encode('utf-8') try: # Produce line (without newline) self.producer_instance.produce( self.topic, value, callback=ConfluentKafkaMsgQAPI.delivery_callback) status = True except BufferError: sys.stderr.write('%% Local producer queue is full ' '(%d messages awaiting delivery): try again\n' % len(self.producer_instance)) status = False except: print("ConfluentKafkaMsgQAPI: Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) status = False else: event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \ "kafka broker={}, topic={}." \ .format(filename, self.broker_name, self.topic) logging_to_console_and_syslog(event) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. self.producer_instance.poll(timeout=0.1) # Wait until all messages have been delivered # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance)) self.producer_instance.flush(timeout=0.1) return status def __consumer_connect_to_broker(self): """ This method tries to connect to the kafka broker. :return: """ is_connected = False # Consumer configuration # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md """ self.consumer_conf = {'bootstrap.servers': self.broker_name, 'group.id': 'kafka-consumer', 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest'} """ if self.consumer_instance is None: try: logging_to_console_and_syslog( "Consumer:{}:Trying to connect to broker_name={}".format( self.thread_identifier, self.broker_name)) # Create Consumer instance # Hint: try debug='fetch' to generate some log messages consumer_conf = { 'bootstrap.servers': self.broker_name, 'group.id': self.topic, 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest' } # consumer_conf['stats_cb'] = stats_cb # consumer_conf['statistics.interval.ms'] = 0 self.consumer_instance = Consumer(consumer_conf) is_connected = True except: logging_to_console_and_syslog( "Consumer:{}:Exception in user code:".format( self.thread_identifier)) logging_to_console_and_syslog("-" * 60) traceback.print_exc(file=sys.stdout) logging_to_console_and_syslog("-" * 60) time.sleep(5) logging_to_console_and_syslog("Consumer:{}:Consumer Successfully " "connected to broker_name={}".format( self.thread_identifier, self.broker_name)) return is_connected @staticmethod def print_assignment(consumer, partitions): print('consumer = {}, Assignment {}:'.format(consumer, partitions)) def dequeue(self): conf = { 'bootstrap.servers': self.broker_name, 'group.id': self.topic, 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest' } if not self.consumer_instance: self.consumer_instance = Consumer(conf) self.consumer_instance.subscribe( [self.topic], on_assign=ConfluentKafkaMsgQAPI.print_assignment) msg = self.consumer_instance.poll(timeout=1.0) if msg is None or msg.error(): return None else: logging_to_console_and_syslog( '%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) msg = msg.value().decode('utf8') logging_to_console_and_syslog("msg.value()={}".format(msg)) self.consumer_instance.close() self.consumer_instance = None return msg def cleanup(self): if self.consumer_instance: self.consumer_instance.close() self.consumer_instance = None