def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: p = Producer() except TypeError as e: assert str(e) == "expected configuration dict" p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) p.produce('mytopic') p.produce('mytopic', value='somedata', key='a key') def on_delivery(err, msg): print('delivery', str) # Since there is no broker, produced messages should time out. assert err.code() == KafkaError._MSG_TIMED_OUT p.produce(topic='another_topic', value='testing', partition=9, callback=on_delivery) p.poll(0.001) p.flush(0.002) p.flush() try: p.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
def test_fatal(): """ Test fatal exceptions """ # Configure an invalid broker and make sure the ALL_BROKERS_DOWN # error is seen in the error callback. p = Producer({'error_cb': error_cb}) with pytest.raises(KafkaException) as exc: KafkaError._test_raise_fatal() err = exc.value.args[0] assert isinstance(err, KafkaError) assert err.fatal() is True p.poll(0) # Need some p use to avoid flake8 unused warning
def test_error_cb(): """ Test the error callback. """ global seen_all_brokers_down # Configure an invalid broker and make sure the ALL_BROKERS_DOWN # error is seen in the error callback. p = Producer({'bootstrap.servers': '127.0.0.1:1', 'socket.timeout.ms': 10, 'error_cb': error_cb}) t_end = time.time() + 5 while not seen_all_brokers_down and time.time() < t_end: p.poll(1) assert seen_all_brokers_down
def producer(args, sniff_timeout_ms=500, sniff_promisc=True): """ Captures packets from a network interface and sends them to a Kafka topic. """ # setup the signal handler signal.signal(signal.SIGINT, signal_handler) global producer_args producer_args = args # connect to kafka logging.info("Connecting to Kafka; %s", args.kafka_configs) kafka_producer = Producer(args.kafka_configs) # initialize packet capture logging.info("Starting packet capture") capture = pcapy.open_live(args.interface, args.snaplen, sniff_promisc, sniff_timeout_ms) pkts_in = 0 try: while not finished.is_set() and (args.max_packets <= 0 or pkts_in < args.max_packets): # capture a packet (pkt_hdr, pkt_raw) = capture.next() if pkt_hdr is not None: logging.debug("Packet received: pkts_in=%d, pkt_len=%s", pkts_in, pkt_hdr.getlen()) pkts_in += 1 pkt_ts = timestamp(pkt_hdr) kafka_producer.produce(args.kafka_topic, key=pack_ts(pkt_ts), value=pkt_raw, callback=delivery_callback) # pretty print, if needed if args.pretty_print > 0 and pkts_in % args.pretty_print == 0: print 'Packet received[%s]' % (pkts_in) # serve the callback queue kafka_producer.poll(0) finally: # flush all messages logging.info("Waiting for '%d' message(s) to flush", len(kafka_producer)) kafka_producer.flush() # pkts_out may not be initialized if the callback was never executed pkts_out = 0 if hasattr(delivery_callback, "pkts_out"): pkts_out = delivery_callback.pkts_out logging.info("'%d' packet(s) in, '%d' packet(s) out", pkts_in, pkts_out)
{ "id": "%s", "type":"alarm", "name": "Tap Unreachable Alarm", "source": "%s", "reason":"Heartbeat failed. TAP or Network is down", "time":"%s", "time-zone":"GMT+5:30", "severity":"%s", "suppress": false, "message": "TAP with IP 10.1.23.4 and with id: %s is not reachable" } """ while True: source = choice(SOURCES) t = datetime.now().isoformat() global SEQ yield (t, template % (SEQ, source, t, choice(SEVERITY), source)) SEQ += 1 sleep(interval) if __name__ == '__main__': for key, data in datagen(): p.poll(0) p.produce(topic, data.encode('utf-8'), callback=delivery_report, key=key)
def simulate_cmd(cmd_topic): """ CMD_TOPICS = ["HistoryAlarmCmd", # 历史报警信息查询 "History300CRAlarmCmd", # 300CR历史报警信息查询 "HistoryDigitalChangeCmd", # 开关量变化信息查询 "HistoryCmd", # 历史数据查询 "TestCmd"] # 试验信息查询 :param q1: :return: """ broker = settings.BROKER topics = settings.CMD_TOPICS # 配置Producer conf = {'bootstrap.servers': broker} # 创建Producer实例 p = Producer(**conf) if cmd_topic == "HistoryAlarmCmd": cmd = {"SourceName": "300MT", "StartTime": 1537668000000, # 2018/9/23 10:00:00 "EndTime": 1537668000000} # 2018/9/23 10:00:00 # cmd = {"SourceName": "All", # "StartTime": 1537668000000, # 2018/9/23 10:00:00 # "EndTime": 1537668000000} # 2018/9/23 10:00:00 j_cmd = json.dumps(cmd) p.produce(cmd_topic, j_cmd) # 发送到kafka if cmd_topic == "History300CRAlarmCmd": cmd = {"SourceName": "300MT", "StartTime": 1537668000000, # 2018/9/23 10:00:00 "EndTime": 1537668000000} # 2018/9/23 10:00:00 # cmd = {"SourceName": "All", # "StartTime": 1537668000000, # 2018/9/23 10:00:00 # "EndTime": 1537668000000} # 2018/9/23 10:00:00 j_cmd = json.dumps(cmd) p.produce(cmd_topic, j_cmd) # 发送到kafka if cmd_topic == "HistoryDigitalChangeCmd": cmd = {"SourceName": "905XR", "StartTime": 1537668000000, # 2018/9/23 10:00:00 "EndTime": 1537668000000} # 2018/9/23 10:00:00 # cmd = {"SourceName": "All", # "StartTime": 1537668000000, # 2018/9/23 10:00:00 # "EndTime": 1537668000000} # 2018/9/23 10:00:00 j_cmd = json.dumps(cmd) p.produce(cmd_topic, j_cmd) # 发送到kafka if cmd_topic == "HistoryCmd": # 单时间 cmd = {"SourceName": ["905XR", "906XR", "907XR"], "Time": [{"StartTime": 1537668000000, "EndTime": 1537668000000}] } # 双时间 # cmd = {"SourceName": ["905XR", "906XR", "907XR"], # "Time": [{"StartTime": 1537668000000, "EndTime": 1537668000000}, # {"StartTime": 1537668000000, "EndTime": 1537668000000}] # } j_cmd = json.dumps(cmd) p.produce(cmd_topic, j_cmd) # 发送到kafka if cmd_topic == "TestCmd": # 单时间 cmd = {"TestCmd": "TestInfo"} j_cmd = json.dumps(cmd) p.produce(cmd_topic, j_cmd) # 发送到kafka p.poll(0) p.flush()
"""Delivery report handler called on successful or failed delivery of message """ if err is not None: print("Failed to deliver message: {}".format(err)) else: delivered_records += 1 print("Produced record to topic {} partition [{}] @ offset {}". format(msg.topic(), msg.partition(), msg.offset())) with open(os.path.join(DATA_DIR, 'bcsample.json')) as data_file: data = json.load(data_file) for n in range(1000): record_key = "key-{}".format(n) record_value = json.dumps(data[n]) print("Producing record: {}\t{}".format(record_key, record_value)) producer.produce(topic, key=record_key, value=record_value, on_delivery=acked) time.sleep(0.25) # p.poll() serves delivery reports (on_delivery) # from previous produce() calls. producer.poll(0) producer.flush() print("{} messages were produced to topic {}!".format( delivered_records, topic))
print("Total data:") print(len(data)) print("---------------------------------") i = 0 for _ in data: i += 1 if i % 10 == 0: print(i) record_key = 'stop' record_value = json.dumps(_) logging.info( "Producing record: {}\t{}".format( record_key, record_value)) producer.produce(topic, key=record_key, value=record_value, on_delivery=acked ) # p.poll() serves delivery reports (on_delivery) # from previous produce() calls. producer.poll() producer.flush() print( "{} messages were produced to topic {}!".format( delivered_records, topic))
def producer_function(): p= Producer({'bootstrap.servers': sys.argv[1]}) my_input= None # produce while client asks to exit while my_input != 'exit': global topic print() print ("**Enter your Message for {}**".format(topic)) my_input= input() if my_input == 'exit': p.poll(0) exit # inform the server about the logging out activity elif my_input == "logout": print ("Attempting to Logout..") my_input =my_input+ "-" + sys.argv[2] p.produce(topic, my_input.encode('utf-8')) time.sleep(5) print("Logged Out!") # retrieve the chat history of the client with another client elif my_input.find('read') != -1: print ("Enter the client whose messages you want to read: ") client= input() print ("Retrieving Messages..") time.sleep(5) client += ".txt" try: file= open(client, "r") print (file.read()) except FileNotFoundError: print ("OOPS! You haven't done any chat to the requested Client.") finally: file.close() # ask the server for active topics and connect to either of them or server itself elif my_input.find('list existing topics') != -1: my_input =my_input+ " " + sys.argv[2] p.produce('server', my_input.encode('utf-8')) print ("Please Wait... Processing your Request!") print ("Existing Topics: ") time.sleep(5) my_input= input() topic= my_input print ("You chose to connect to: {}".format(topic)) contact_list.add(my_input) # connect to the server from any stage elif my_input == 'connect to server': print("Attempting to Connect..") time.sleep(5) topic= 'server' print ("Connection Successful!") # retrieve the list of contacts elif my_input == 'contact list': temp_topic = topic topic= sys.argv[2] for contact in contact_list: print (contact) topic= temp_topic # else send a message to the client/server you want to chat to else: my_input =my_input+ "-" + sys.argv[2] p.produce(topic, my_input.encode('utf-8'), callback = delivery_report)
def main(): # kafka consumer = Consumer(collectd_cfg['consumer']) consumer.subscribe([collectd_cfg['raw_data_topic']]) producer = Producer(collectd_cfg['producer']) # Trigger any available delivery report callbacks from previous produce() calls # see: https://github.com/confluentinc/confluent-kafka-python/issues/16 producer.poll(0) # influxdb influxdb_client = InfluxDBClient(host=influxdb_cfg['server'], database=influxdb_cfg['database']) influxdb_client.create_database(influxdb_cfg['database']) influxdb_client.create_retention_policy(name="infinite", duration='INF', replication=1, database=influxdb_cfg['database'], default=True) influxdb_data_points = [] influxdb_to_send = threading.Event() influxdb_to_stop = threading.Event() influxdb_to_stop.clear() influxdb_to_send.clear() def set_influxdb_to_send(): while not influxdb_to_stop.isSet(): sleep(0.5) influxdb_to_send.set() influxdb_flag_thread = threading.Thread(target=set_influxdb_to_send) influxdb_flag_thread.start() logger.info("Start processing collectd data ...") try: while True: msg = consumer.poll(1.0) if msg is None: continue if msg.error(): logger.error("Consumer error: {}".format(msg.error())) continue measurements = extract(msg) # Send extracted data to kafka topics # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. for item in measurements: producer.produce(topic='collectd', value=str({item[0]: item[1]}), timestamp=item[2], callback=delivery_report) producer.poll(0) # Send extracted data to influxdb, but batching (only send every 1s) for item in measurements: influxdb_data_points.append({ "measurement": item[0], # timestamp from ms in collectd to ns in influxdb "time": int(item[2]) * 10**6, "fields": { "value": item[1], } }) if influxdb_to_send.isSet(): influxdb_client.write_points(influxdb_data_points) influxdb_to_send.clear() influxdb_data_points = [] except KeyboardInterrupt: # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. producer.flush() consumer.close() influxdb_to_stop.set()
def test_cluster(bootstrap_ip, bootstrap_port): p = Producer({'bootstrap.servers': f'{bootstrap_ip}:{bootstrap_port}'}) p.produce('cluster-test', 'Cluster-Test', callback=test_ok) p.poll(5)
class KafkaSink(SandcrawlerWorker): def __init__(self, kafka_hosts: str, produce_topic: str, **kwargs): self.sink = None self.counts = Counter() self.produce_topic = produce_topic self.kafka_hosts = kafka_hosts config = self.producer_config({ "bootstrap.servers": kafka_hosts, "message.max.bytes": 30000000, # ~30 MBytes; broker is ~50 MBytes "api.version.request": True, "api.version.fallback.ms": 0, }) self.producer = Producer(config) @staticmethod def _fail_fast(err: Any, msg: Any) -> None: if err is not None: print("Kafka producer delivery error: {}".format(err), file=sys.stderr) print("Bailing out...", file=sys.stderr) # TODO: should it be sys.exit(-1)? raise KafkaException(err) def producer_config(self, kafka_config: dict) -> dict: config = kafka_config.copy() config.update({ "delivery.report.only.error": True, "default.topic.config": { "message.timeout.ms": 30000, "request.required.acks": -1, # all brokers must confirm }, }) return config def push_record(self, msg: Any, key: Optional[str] = None) -> Any: self.counts["total"] += 1 if type(msg) == dict: if not key and "key" in msg: key = msg["key"] msg = json.dumps(msg) if type(msg) == str: msg = msg.encode("utf-8") assert type(msg) == bytes self.producer.produce(self.produce_topic, msg, key=key, on_delivery=self._fail_fast) self.counts["produced"] += 1 # check for errors etc self.producer.poll(0) def push_batch(self, msgs: List[Any]) -> List[Any]: for m in msgs: self.push_record(m) return [] def finish(self) -> Counter: self.producer.flush() return self.counts
class CompetitionProducer: """ """ daemon = True producer = None def __init__(self, server): conf = {'bootstrap.servers': server} self.producer = Producer(conf) # Create producer # message must be in byte format def send(self, topic, message): self.producer.produce(topic, message) # Sending messages to a certain topic self.producer.poll(timeout=0) def main(self, topic, initial_batch, items, predictions, initial_training_time, batch_size, time_interval, predictions_time_interval, spark_topic, competition_id): """ Recreates the stream. Sends the data in batches: first test (without the target value) and then train batches. All batches are sent according to the time intervals set for the current competition. :param topic: :param initial_batch: :param items: :param predictions: :param initial_training_time: :param batch_size: :param time_interval: :param predictions_time_interval: :param spark_topic: :param competition_id: :return: """ for item in initial_batch: try: # Send row by row from initial batch as json self.send(topic, orjson.dumps(item)) except Exception as e: # Check if topic exists, if not, create it and then send print(e) # After sending initial batch, sleep for initial training time time.sleep(int(initial_training_time)) # Creating lists of batch size, one for test items with just values and second with predictions for training test_groups = list(self.chunker(items, batch_size)) train_groups = list(self.chunker(predictions, batch_size)) i = -1 # Accessing each group in the list test_groups for group in test_groups: # In parallel accessing the predictions # Adding tag, deadline and released at to every item in train group / prediction released_at = datetime.datetime.now() # for item in test group add tag, deadline and released for item in group: item['tag'] = 'TEST' item['Deadline'] = str(released_at + datetime.timedelta(seconds=int(predictions_time_interval))) item['Released'] = str(released_at) item['competition_id'] = str(competition_id) # Sending testing items try: self.send(topic, orjson.dumps(item)) except Exception as e: print(e) i = i + 1 train_group = train_groups[i] for item in train_group: deadline = released_at + datetime.timedelta(seconds=int(predictions_time_interval)) item['Deadline'] = deadline.strftime("%Y-%m-%d %H:%M:%S") item['Released'] = released_at.strftime("%Y-%m-%d %H:%M:%S") item['competition_id'] = competition_id try: self.send(spark_topic, orjson.dumps(item)) except Exception as e: print(e) time.sleep(time_interval) for item in train_group: item['tag'] = 'TRAIN' item['Deadline'] = released_at + datetime.timedelta(seconds=int(predictions_time_interval)) item['Released'] = released_at try: self.send(topic, orjson.dumps(item, default=json_util.default)) except Exception as e: print(e) time.sleep(time_interval) self.producer.flush() @staticmethod def chunker(seq, size): """ Returns data in chunks (batches) of a given size. """ return (seq[pos:pos + size] for pos in range(0, len(seq), size)) @staticmethod def is_not_empty(row): """Check if row is empty.""" return all(item == "" for item in row) def create_competition(self, competition, items, predictions, initial_batch): """Create a competition and start releasing the data stream.""" self.main( topic=competition.name.lower().replace(" ", ""), initial_training_time=competition.initial_training_time, initial_batch=initial_batch, items=items, predictions=predictions, batch_size=competition.batch_size, time_interval=competition.time_interval, predictions_time_interval=competition.predictions_time_interval, spark_topic=competition.name.lower().replace(" ", "") + 'spark_train', competition_id=competition.competition_id)
class KafkaStreamingClient(AbstractStreamingClient): """Kafka streaming client.""" def __init__(self, config): # pragma: no cover """ Streaming client implementation based on Kafka. Configuration keys: KAFKA_ADDRESS KAFKA_CONSUMER_GROUP KAFKA_TOPIC TIMEOUT EVENTHUB_KAFKA_CONNECTION_STRING """ self.logger = Logger() self.topic = config.get("KAFKA_TOPIC") if config.get("TIMEOUT"): try: self.timeout = int(config.get("TIMEOUT")) except ValueError: self.timeout = None else: self.timeout = None kafka_config = self.create_kafka_config(config) self.admin = admin.AdminClient(kafka_config) if config.get("KAFKA_CONSUMER_GROUP") is None: self.logger.info('Creating Producer') self.producer = Producer(kafka_config) else: self.logger.info('Creating Consumer') self.consumer = Consumer(kafka_config) @staticmethod def create_kafka_config(user_config: dict) -> dict: # pragma: no cover """Create the kafka configuration.""" config = { "bootstrap.servers": user_config.get("KAFKA_ADDRESS"), "enable.auto.commit": False, "auto.offset.reset": "earliest", "default.topic.config": { 'auto.offset.reset': 'smallest' }, } if user_config.get('EVENTHUB_KAFKA_CONNECTION_STRING'): ssl_location = user_config.get( 'SSL_CERT_LOCATION') or '/etc/ssl/certs/ca-certificates.crt' eventhub_config = { 'security.protocol': "SASL_SSL", 'sasl.mechanism': "PLAIN", 'ssl.ca.location': ssl_location, 'sasl.username': '******', 'sasl.password': user_config.get('EVENTHUB_KAFKA_CONNECTION_STRING'), 'client.id': 'agogosml', } config = {**config, **eventhub_config} if user_config.get('KAFKA_CONSUMER_GROUP') is not None: config['group.id'] = user_config['KAFKA_CONSUMER_GROUP'] if user_config.get('KAFKA_DEBUG') is not None: config['debug'] = user_config['KAFKA_DEBUG'] return config def delivery_report(self, err, msg): # pragma: no cover """ Indicate delivery result. Called once for each message produced. Triggered by poll() or flush(). :param err: An error message. :param msg: A string input to be uploaded to kafka. """ if err is not None: self.logger.error('Message delivery failed: %s', err) else: self.logger.info('Message delivered to %s [%s]', msg.topic(), msg.partition()) def send(self, message: str): # pragma: no cover if not isinstance(message, str): raise TypeError('str type expected for message') try: mutated_message = message.encode('utf-8') self.logger.info('Sending message to kafka topic: %s', self.topic) self.producer.poll(0) self.producer.produce(self.topic, mutated_message, callback=self.delivery_report) self.producer.flush() return True except Exception as ex: self.logger.error('Error sending message to kafka: %s', ex) return False def stop(self): # pragma: no cover pass def check_timeout(self, start: datetime): # pragma: no cover """Interrupts if too much time has elapsed since the kafka client started running.""" if self.timeout is not None: elapsed = datetime.now() - start if elapsed.seconds >= self.timeout: raise KeyboardInterrupt def handle_kafka_error(self, msg): # pragma: no cover """Handle an error in kafka.""" if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event self.logger.info('%% %s [%d] reached end at offset %d\n', msg.topic(), msg.partition(), msg.offset()) else: # Error raise KafkaException(msg.error()) def start_receiving(self, on_message_received_callback): # pragma: no cover try: self.subscribe_to_topic() start = datetime.now() while True: # Stop loop after timeout if exists self.check_timeout(start) # Poll messages from topic msg = self.read_single_message() if msg is not None: on_message_received_callback(msg) except KeyboardInterrupt: self.logger.info('Aborting listener...') finally: # Close down consumer to commit final offsets. self.consumer.close() def subscribe_to_topic(self): # pragma: no cover """Subscribe to topic.""" self.consumer.subscribe([self.topic]) def read_single_message(self): # pragma: no cover """Poll messages from topic.""" msg = self.consumer.poll(0.000001) if msg is None: return None if msg.error(): # Error or event self.handle_kafka_error(msg) return None # Proper message # self.logger.info('kafka read message: %s, from topic: %s', msg.value(), msg.topic()) self.consumer.commit(msg) return msg.value()
def test_ingester(self): init_db_sync(config=config, verbose=True) log("Setting up paths") # path_kafka = pathlib.Path(config["path"]["kafka"]) path_logs = pathlib.Path(config["path"]["logs"]) if not path_logs.exists(): path_logs.mkdir(parents=True, exist_ok=True) if config["misc"]["broker"]: log("Setting up test groups and filters in Fritz") program = Program(group_name="FRITZ_TEST", group_nickname="test") Filter( collection="ZTF_alerts", group_id=program.group_id, filter_id=program.filter_id, ) program2 = Program(group_name="FRITZ_TEST_AUTOSAVE", group_nickname="test2") Filter( collection="ZTF_alerts", group_id=program2.group_id, filter_id=program2.filter_id, autosave=True, pipeline=[{"$match": {"objectId": "ZTF20aaelulu"}}], ) program3 = Program( group_name="FRITZ_TEST_UPDATE_ANNOTATIONS", group_nickname="test3" ) Filter( collection="ZTF_alerts", group_id=program3.group_id, filter_id=program3.filter_id, update_annotations=True, pipeline=[ {"$match": {"objectId": "ZTF20aapcmur"}} ], # there are 3 alerts in the test set for this oid ) # clean up old Kafka logs log("Cleaning up Kafka logs") subprocess.run(["rm", "-rf", path_logs / "kafka-logs", "/tmp/zookeeper"]) log("Starting up ZooKeeper at localhost:2181") # start ZooKeeper in the background cmd_zookeeper = [ os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-start.sh"), "-daemon", os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"), ] with open(path_logs / "zookeeper.stdout", "w") as stdout_zookeeper: # p_zookeeper = subprocess.run( cmd_zookeeper, stdout=stdout_zookeeper, stderr=subprocess.STDOUT ) # take a nap while it fires up time.sleep(3) log("Starting up Kafka Server at localhost:9092") # start the Kafka server: cmd_kafka_server = [ os.path.join(config["path"]["kafka"], "bin", "kafka-server-start.sh"), "-daemon", os.path.join(config["path"]["kafka"], "config", "server.properties"), ] with open( os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w" ) as stdout_kafka_server: # p_kafka_server = subprocess.Popen(cmd_kafka_server, stdout=stdout_kafka_server, stderr=subprocess.STDOUT) # p_kafka_server = subprocess.run(cmd_kafka_server) # take a nap while it fires up time.sleep(3) # get kafka topic names with kafka-topics command cmd_topics = [ os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"), "--zookeeper", config["kafka"]["zookeeper.test"], "-list", ] topics = ( subprocess.run(cmd_topics, stdout=subprocess.PIPE) .stdout.decode("utf-8") .split("\n")[:-1] ) log(f"Found topics: {topics}") # create a test ZTF topic for the current UTC date date = datetime.datetime.utcnow().strftime("%Y%m%d") topic_name = f"ztf_{date}_programid1_test" if topic_name in topics: # topic previously created? remove first cmd_remove_topic = [ os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"), "--zookeeper", config["kafka"]["zookeeper.test"], "--delete", "--topic", topic_name, ] # print(kafka_cmd) remove_topic = ( subprocess.run(cmd_remove_topic, stdout=subprocess.PIPE) .stdout.decode("utf-8") .split("\n")[:-1] ) log(f"{remove_topic}") log(f"Removed topic: {topic_name}") time.sleep(1) if topic_name not in topics: log(f"Creating topic {topic_name}") cmd_create_topic = [ os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"), "--create", "--bootstrap-server", config["kafka"]["bootstrap.test.servers"], "--replication-factor", "1", "--partitions", "1", "--topic", topic_name, ] with open( os.path.join(config["path"]["logs"], "create_topic.stdout"), "w" ) as stdout_create_topic: # p_create_topic = \ subprocess.run( cmd_create_topic, stdout=stdout_create_topic, stderr=subprocess.STDOUT, ) log("Starting up Kafka Producer") # spin up Kafka producer producer = Producer( {"bootstrap.servers": config["kafka"]["bootstrap.test.servers"]} ) # small number of alerts that come with kowalski path_alerts = pathlib.Path("/app/data/ztf_alerts/20200202/") # grab some more alerts from gs://ztf-fritz/sample-public-alerts try: log("Grabbing more alerts from gs://ztf-fritz/sample-public-alerts") r = requests.get("https://www.googleapis.com/storage/v1/b/ztf-fritz/o") aa = r.json()["items"] ids = [pathlib.Path(a["id"]).parent for a in aa if "avro" in a["id"]] except Exception as e: log( "Grabbing alerts from gs://ztf-fritz/sample-public-alerts failed, but it is ok" ) log(f"{e}") ids = [] subprocess.run( [ "gsutil", "-m", "cp", "-n", "gs://ztf-fritz/sample-public-alerts/*.avro", "/app/data/ztf_alerts/20200202/", ] ) log(f"Fetched {len(ids)} alerts from gs://ztf-fritz/sample-public-alerts") # push! for p in path_alerts.glob("*.avro"): with open(str(p), "rb") as data: # Trigger any available delivery report callbacks from previous produce() calls producer.poll(0) log(f"Pushing {p}") # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. producer.produce(topic_name, data.read(), callback=delivery_report) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. producer.flush() log("Starting up Ingester") # digest and ingest watchdog(obs_date=date, test=True) log("Digested and ingested: all done!") # shut down Kafka server and ZooKeeper time.sleep(20) log("Shutting down Kafka Server at localhost:9092") # start the Kafka server: cmd_kafka_server_stop = [ os.path.join(config["path"]["kafka"], "bin", "kafka-server-stop.sh"), os.path.join(config["path"]["kafka"], "config", "server.properties"), ] with open( os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w" ) as stdout_kafka_server: # p_kafka_server_stop = \ subprocess.run( cmd_kafka_server_stop, stdout=stdout_kafka_server, stderr=subprocess.STDOUT, ) log("Shutting down ZooKeeper at localhost:2181") cmd_zookeeper_stop = [ os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-stop.sh"), os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"), ] with open( os.path.join(config["path"]["logs"], "zookeeper.stdout"), "w" ) as stdout_zookeeper: # p_zookeeper_stop = \ subprocess.run( cmd_zookeeper_stop, stdout=stdout_zookeeper, stderr=subprocess.STDOUT ) log("Checking the ZTF alert collection states") mongo = Mongo( host=config["database"]["host"], port=config["database"]["port"], replica_set=config["database"]["replica_set"], username=config["database"]["username"], password=config["database"]["password"], db=config["database"]["db"], verbose=True, ) collection_alerts = config["database"]["collections"]["alerts_ztf"] collection_alerts_aux = config["database"]["collections"]["alerts_ztf_aux"] n_alerts = mongo.db[collection_alerts].count_documents({}) assert n_alerts == 313 n_alerts_aux = mongo.db[collection_alerts_aux].count_documents({}) assert n_alerts_aux == 145 if config["misc"]["broker"]: log("Checking that posting to SkyPortal succeeded") # check number of candidates that passed the first filter resp = requests.get( program.base_url + f"/api/candidates?groupIDs={program.group_id}", headers=program.headers, timeout=3, ) assert resp.status_code == requests.codes.ok result = resp.json() assert result["status"] == "success" assert "data" in result assert "totalMatches" in result["data"] assert result["data"]["totalMatches"] == 88 # check that the only candidate that passed the second filter (ZTF20aaelulu) got saved as Source resp = requests.get( program2.base_url + f"/api/sources?group_ids={program2.group_id}", headers=program2.headers, timeout=3, ) assert resp.status_code == requests.codes.ok result = resp.json() assert result["status"] == "success" assert "data" in result assert "totalMatches" in result["data"] assert result["data"]["totalMatches"] == 1 assert "sources" in result["data"] assert result["data"]["sources"][0]["id"] == "ZTF20aaelulu"
def producer_trigger(raw_data, context): state_stats_url = ('https://api.covid19india.org/data.json') district_stats_url = ('https://api.covid19india.org/v2/state_district_wise.json') bootstrap_servers = "localhost:9092" kafka_district_data_topic_name = "district-data" kafka_processed_data_topic_name = "processed-data" conf = {'bootstrap.servers': bootstrap_servers} producer = Producer(conf, logger=logger) # import raw district data district_data = requests.get(district_stats_url).json() for data in district_data: state = data['state'] district_data = data['districtData'] for dd in district_data: district = dd['district'] key = dict({'state': state, 'district': district}) value = dict({'state': state, 'district': district, 'active': dd['active'], 'confirmed': dd['confirmed'], 'recovered': dd['recovered'], 'deceased': dd['deceased'], 'deltaConfirmed': dd['delta']['confirmed'], 'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'], 'notes': dd['notes'] }) try: producer.produce(topic=kafka_district_data_topic_name, value=json.dumps(value), key=json.dumps(key), on_delivery=fail) except BufferError: logger.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(producer)) producer.poll(0) logger.info('%% Waiting for %d deliveries\n' % len(producer)) producer.flush() district_data = requests.get(district_stats_url).json() for data in district_data: state = data['state'] district_data = data['districtData'] finalDict = {} for dd in district_data: district = dd['district'] key = dict({'state': state, 'district': district}) if(dd['active'] < 200): finalDict.update({ 'low_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'],'confirmed': dd['confirmed'], 'recovered': dd['recovered'], 'deceased': dd['deceased'], 'deltaConfirmed': dd['delta']['confirmed'], 'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'], 'notes': dd['notes']})}) elif(dd['active'] > 200 and dd['active'] < 800): finalDict.update({ 'moderate_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'] , 'confirmed': dd['confirmed'], 'recovered': dd['recovered'], 'deceased': dd['deceased'], 'deltaConfirmed': dd['delta']['confirmed'], 'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'], 'notes': dd['notes']})}) elif(dd['active'] > 800): finalDict.update({ 'high_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'] , 'confirmed': dd['confirmed'], 'recovered': dd['recovered'], 'deceased': dd['deceased'], 'deltaConfirmed': dd['delta']['confirmed'], 'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'], 'notes': dd['notes']})}) try: producer.produce(topic=kafka_processed_data_topic_name, value=json.dumps(finalDict), key=json.dumps(key), on_delivery=fail) except BufferError: logger.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(producer)) producer.poll(0) logger.info('%% Waiting for %d deliveries\n' % len(producer)) producer.flush()
class SimPortClass: ''' Our class to handle an ODC Port. We must have __init__, ProcessJSONConfig, Enable, Disable, EventHander, TimerHandler and RestRequestHandler defined, as they will be called by our c/c++ code. ODC publishes some functions to this Module (when run) they are part of the odc module(include). We currently have odc.log, odc.SetTimer and odc.PublishEvent. ''' # Worker Methods. They need to be high in the code so they are available in the code below. No forward declaration in Python def LogTrace(self, message): odc.log(self.guid, Trace, message) def LogError(self, message): odc.log(self.guid, Error, message) def LogDebug(self, message): odc.log(self.guid, Debug, message) def LogInfo(self, message): odc.log(self.guid, Info, message) def LogWarn(self, message): odc.log(self.guid, Warn, message) def LogCritical(self, message): odc.log(self.guid, Critical, message) # Mandatory Methods that are called by ODC PyPort def __init__(self, odcportguid, objectname): self.objectname = objectname # Documentation/error use only. self.guid = odcportguid # So that when we call an odc method, ODC can work out which pyport to hand it too. self.Enabled = False self.MessageIndex = 0 self.EventQueueSize = 0 self.QueueErrorState = 0 # 0 - No Error, 1 - Error, Notified, self.SendErrorState = 0 # As above self.LastMessageIndex = self.MessageIndex self.StartTimeSeconds = time.time() self.timestart = 1.1 # Used for profiling, setup as a float self.measuretimeus = 0 self.measuretimeus2 = 0 self.ConfigDict = {} # Config Dictionary self.LogInfo( "PyPortKafka - SimPortClass Init Called - {}".format(objectname)) self.LogDebug("Python sys.path - {}".format(sys.path)) self.producer = None return # time.perf_counter_ns() (3.6 up) gives the time including include sleeps time.process_time() is overall time. def timeusstart(self): self.timestart = time.perf_counter() #float fractions of a second def timeusstop(self): return int((time.perf_counter() - self.timestart) * 1000000) def minutetimermessage(self): DeltaSeconds = time.time() - self.StartTimeSeconds self.LogError( "PyPortKafka status. Event Queue {}, Messages Processed - {}, Messages/Second - {}" .format( self.EventQueueSize, self.MessageIndex, math.floor((self.MessageIndex - self.LastMessageIndex) / DeltaSeconds))) self.StartTimeSeconds = time.time() self.LastMessageIndex = self.MessageIndex return def Config(self, MainJSON, OverrideJSON): """ The JSON values are passed as strings (stripped of comments), which we then load into a dictionary for processing Note that this does not handle Inherits JSON entries correctly (Inherits is effectily an Include file entry)""" #self.LogDebug("Passed Main JSON Config information - Len {} , {}".format(len(MainJSON),MainJSON)) #self.LogDebug("Passed Override JSON Config information - Len {} , {}".format(len(OverrideJSON), OverrideJSON)) # Load JSON into Dicts Override = {} try: if len(MainJSON) != 0: self.ConfigDict = json.loads(MainJSON) if len(OverrideJSON) != 0: Override = json.loads(OverrideJSON) except: self.LogError("Exception on parsing JSON Config data - {}".format( sys.exc_info()[0])) return self.LogDebug("JSON Config strings Parsed") # Now use the override config settings to adjust or add to the MainConfig. Only root json values can be adjusted. # So you cannot change a single value in a Binary point definition without rewriting the whole "Binaries" json key. self.ConfigDict.update( Override ) # Merges with Override doing just that - no recursion into sub dictionaries #self.LogTrace("Combined (Merged) JSON Config {}".format(json.dumps(self.ConfigDict))) # Now extract what is needed for this instance, or just reference the ConfigDict when needed. kafkaserver = self.ConfigDict["bootstrap.servers"] self.topic = self.ConfigDict["Topic"] # The acks can be 0, 1, 2 etc or all. It is the number of nodes that have to have written the message before we get acknoledgement. # So a value of 0 is fire and forget. etc. # Details of what can be passed see: https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md # We require all of the nodes that are configured to ack for the data to be valid - we dont control it here. # In the cluster config it is set to 2. # # Really interesting discussion of how to loose messages in Kafka (but also how not to loose messages!) # https://jack-vanlightly.com/blog/2018/9/14/how-to-lose-messages-on-a-kafka-cluster-part1 # # To control batching - the values are the defaults: 'batch.num.messages': 10000 OR 'message.max.bytes' : 1000000, # queuing.strategy=fifo an attempt to make sure messages arrive in the order they are sent.. # max.in.flight.requests.per.connection=5 Usually a very large number, 5 does not seem to slow things down - maybe 1%? # request.required.acks are the number of copies of the message pushed to the other nodes. Really dont need any acks, if the primary has the message, it will replicate shortly. Or could require an ack from at least 1 node # 'compression.type':'none' - does not seem to make much difference (none,snappy, gzip) # 'delivery.report.only.error':False This does make things quicker from approx 70k/sec to 74k/sec # 'message.send.max.retries': 100000 - will not drop the message until this many attempts have been made... conf = { 'bootstrap.servers': kafkaserver, 'client.id': 'OpenDataCon', 'delivery.report.only.error': True, 'message.send.max.retries': 10000000, 'request.required.acks': 0, 'max.in.flight.requests.per.connection': 100 } self.producer = Producer(conf) return def Operational(self): """ This is called from ODC once ODC is ready for us to be fully operational - normally after Build is complete""" self.LogDebug("Port Operational - {}".format( datetime.now().isoformat(" "))) # This is only done once - will self restart from the timer callback. odc.SetTimer(self.guid, 1, 500) # Start the timer cycle odc.SetTimer(self.guid, 2, 10000) # First status message after 10 seconds return def Enable(self): self.LogDebug("Enabled - {}".format(datetime.now().isoformat(" "))) self.enabled = True return def Disable(self): self.LogDebug("Disabled - {}".format(datetime.now().isoformat(" "))) self.enabled = False return # Not used def delivery_report(self, err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: if self.SendErrorState == 0: self.LogError("Kafka Send Message Error - {} [{}] - {}".format( msg.topic(), msg.partition(), err)) self.SendErrorState = 1 else: if self.SendErrorState == 1: self.LogError( "Kafka Send Message Error Cleared - {} [{}]".format( msg.topic(), msg.partition())) self.SendErrorState = 0 # Needs to return True or False, which will be translated into CommandStatus::SUCCESS or CommandStatus::UNDEFINED # EventType (string) Index (int), Time (msSinceEpoch), Quality (string) Payload (string) Sender (string) # There is no callback available, the ODC code expects this method to return without delay. def EventHandler(self, EventType, Index, Time, Quality, Payload, Sender): # self.LogDebug("EventHander: {}, {}, {} {} - {}".format(self.guid,Sender,Index,EventType,Payload)) if (EventType == "ConnectState"): return True self.LogError("Events must be queued {}".format(EventType)) # Always return True - we processed the message - even if we could not pass it to Kafka. return True def millisdiff(self, starttimedate): dt = datetime.now() - starttimedate ms = (dt.days * 24 * 60 * 60 + dt.seconds) * 1000 + dt.microseconds / 1000.0 return ms # Will be called at the appropriate time by the ASIO handler system. Will be passed an id for the timeout, # so you can have multiple timers running. def TimerHandler(self, TimerId): # self.LogDebug("TimerHander: ID {}, {}".format(TimerId, self.guid)) if (self.producer is not None): self.producer.poll(0) # Do any waiting processing, but dont wait! if (TimerId == 1): MaxMessageCount = 5000 longwaitmsec = 100 shortwaitmsec = 5 EventCount = 1 starttime = datetime.now() self.measuretimeus = 0 self.measuretimeus2 = 0 if (self.producer is not None): # Get Events from the queue and process them, up until we have an empty queue or MaxMessageCount entries # Then trigger the Kafka library to send them. while ((EventCount < MaxMessageCount)): EventCount += 1 self.timeusstart() ### Takes about 3.2usec (old 8.4usec) per call (approx) on DEV server JsonEventstr, empty = odc.GetNextEvent(self.guid) self.measuretimeus += self.timeusstop() # The EventType will be an empty string if the queue is empty. if (empty == True): break try: self.timeusstart() # Now 32msec/5000, about 5usec per record. (old 45msec/5000 so 9usec/record) # Can we only get a single delivery report per block of up to 5000 messages? # If we set the re-try count to max int, then handling the delivery report does not make much sense - the buffer will just fill up and then we will get an exception # here due to a full buffer. And we fill up the next buffer (in PyPort) (note we need to store the event we were about to send so we dont loose it!) # Eventually we will loose events, but there is nothing we can do about that. self.producer.produce(self.topic, value=JsonEventstr) self.measuretimeus2 += self.timeusstop() if self.QueueErrorState == 1: self.LogError( "Kafka Producer Queue Recovered - NOT full ({} messages awaiting delivery)" .format(len(self.producer))) self.QueueErrorState = 0 except BufferError: if self.QueueErrorState == 0: self.LogError( "Kafka Producer Queue is full ({} messages awaiting delivery)" .format(len(self.producer))) self.QueueErrorState = 1 break if (EventCount % 100 == 0): self.producer.poll( 0) # Do any waiting processing, but dont wait! self.EventQueueSize = odc.GetEventQueueSize(self.guid) #self.LogDebug("Kafka Produced {} messages. Kafka queue size {}. ODC Event queue size {} Execution time {} msec Timed code {}, {} us".format(EventCount,len(self.producer),self.EventQueueSize,self.millisdiff(starttime),self.measuretimeus,self.measuretimeus2)) self.MessageIndex += EventCount # If we have pushed the maximum number of events in, we need to go faster... # If the producer queue hits the limit, this means the kafka cluster is not keeping up. if EventCount < MaxMessageCount: odc.SetTimer( self.guid, 1, longwaitmsec) # We do not have messages waiting... else: odc.SetTimer(self.guid, 1, shortwaitmsec) # We do have messages waiting if (TimerId == 2): self.minutetimermessage() odc.SetTimer(self.guid, 2, 10000) # Set to run again in 10 seconds if (self.producer is not None): self.producer.poll(0) # Do any waiting processing, but dont wait! return # The Rest response interface - the following method will be called whenever the restful interface (a single interface for all PythonPorts) gets # called. It will be decoded sufficiently so that it is passed to the correct PythonPort (us) # To make these calls in Python (our test scripts) we can use the library below. # https://2.python-requests.org//en/master/ # # We return the response that we want sent back to the caller. This will be a JSON string. A null string would be an error. def RestRequestHandler(self, url, content): self.LogDebug("RestRequestHander: {}".format(url)) Response = {} # Empty Dict if ("GET" in url): DeltaSeconds = time.time() - self.StartTimeSeconds Response[ "Status"] = "PyPortKafka Processing is running. Messages Processed - {}, Messages/Second - {}".format( self.MessageIndex, math.floor((self.MessageIndex - self.LastMessageIndex) / DeltaSeconds)) self.StartTimeSeconds = time.time() self.LastMessageIndex = self.MessageIndex return json.dumps(Response)
class CallReport: """ If the CRM system send two reports per call this class will handle the first one. FinalCallReport will handle the final report. """ def __init__(self, group=None, consumer_topic=None, producer_topic=None): self._producer = Producer({ "bootstrap.servers": "", "security.protocol": "SASL_SSL", "sasl.mechanisms": "PLAIN", "sasl.username": "", "sasl.password": "" }) self._group = group self._consumer_topic = consumer_topic self._consumer = None self._producer_topic = producer_topic @abstractmethod def start(self): """ Example implementation for stream case """ self._consumer = Consumer({ "bootstrap.servers": "", "security.protocol": "SASL_SSL", "sasl.mechanisms": "PLAIN", "sasl.username": "", "sasl.password": "", 'group.id': self._group, 'enable.auto.commit': False, 'auto.offset.reset': 'earliest' }) self._consumer.subscribe([self._consumer_topic]) while True: msg = self._consumer.poll(0.1) if msg is None: continue elif not msg.error(): # Received message self._consumer.commit() self.send_call_report(msg.value()) elif msg.error().code() == KafkaError._PARTITION_EOF: logging.info('End of partition reached {}/{}'.format( msg.topic(), msg.partition())) else: logging.error('Error occurred: {}'.format(msg.error().str())) @abstractmethod def manage_call_report(self, call_report): """ this function will be overwritten per setup on the dialer. The goal is to reformat the call report into the json format for OptimalQ's post request :param call_report: :return: call post_call_report(sample_pool_id, call_report) """ pass def send_call_report(self, report): """ Sends call report to the SyncReport service via kafka :param report: :return: """ try: report = json.dumps(report) self._producer.produce(self._producer_topic, report, callback=KafkaUtils.self.delivery_report) self._producer.poll(0) logging.info("Sent call report to SyncReport") except Exception as ex: logging.exception( 'Exception while trying to send message {} to topic {} - {}'. format(report, self._producer_topic, ex)) def terminate(self): self._producer.flush()
def main(args): brokers = args.brokers group_id = args.group_id input_topic = args.input_topic input_partition = args.input_partition output_topic = args.output_topic consumer = Consumer({ 'bootstrap.servers': brokers, 'group.id': group_id, 'auto.offset.reset': 'earliest', # Do not advance committed offsets outside of the transaction. # Consumer offsets are committed along with the transaction # using the producer's send_offsets_to_transaction() API. 'enable.auto.commit': False, 'enable.partition.eof': True, }) # Prior to KIP-447 being supported each input partition requires # its own transactional producer, so in this example we use # assign() to a single partition rather than subscribe(). # A more complex alternative is to dynamically create a producer per # partition in subscribe's rebalance callback. consumer.assign([TopicPartition(input_topic, input_partition)]) producer = Producer({ 'bootstrap.servers': brokers, 'transactional.id': 'eos-transactions.py' }) # Initialize producer transaction. producer.init_transactions() # Start producer transaction. producer.begin_transaction() eof = {} msg_cnt = 0 print("=== Starting Consume-Transform-Process loop ===") while True: # serve delivery reports from previous produce()s producer.poll(0) # read message from input_topic msg = consumer.poll(timeout=1.0) if msg is None: continue topic, partition = msg.topic(), msg.partition() if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: eof[(topic, partition)] = True print("=== Reached the end of {} [{}] at {}====".format( topic, partition, msg.offset())) if len(eof) == len(consumer.assignment()): print("=== Reached end of input ===") break continue # clear EOF if a new message has been received eof.pop((topic, partition), None) msg_cnt += 1 # process message processed_key, processed_value = process_input(msg) # produce transformed message to output topic producer.produce(output_topic, processed_value, processed_key, on_delivery=delivery_report) if msg_cnt % 100 == 0: print( "=== Committing transaction with {} messages at input offset {} ===" .format(msg_cnt, msg.offset())) # Send the consumer's position to transaction to commit # them along with the transaction, committing both # input and outputs in the same transaction is what provides EOS. producer.send_offsets_to_transaction( consumer.position(consumer.assignment()), consumer.consumer_group_metadata()) # Commit the transaction producer.commit_transaction() # Begin new transaction producer.begin_transaction() msg_cnt = 0 print("=== Committing final transaction with {} messages ===".format( msg_cnt)) # commit processed message offsets to the transaction producer.send_offsets_to_transaction( consumer.position(consumer.assignment()), consumer.consumer_group_metadata()) # commit transaction producer.commit_transaction() consumer.close()
print("Invalid name: " + file_name); exit(1) capMsg = sumM3Message() capMsg.topic="m3Capture" capMsg.copy_m3name_values(m3n) capMsg.fpath = file_name captureMessage = capMsg.to_string() print("Message: " + captureMessage) # Send the message on the "m3Summary" topic try: #define the producer configuration p = Producer({'bootstrap.servers': sys.argv[1]}) print("Producer started") # Produce a message p.produce(capMsg.topic, captureMessage.encode(encoding='utf-8', errors='strict'), callback=captureAcked) print("Message produced") # Wait up to 1 second for events. Callbacks will be invoked during # this method call if the message is acknowledged. p.poll(1) print("Poll done") except Exception: traceback.print_exc() print("Failed to produce the M3 summary message!") exit(1) # flush and exit after the message is normally sent. print("Flushing") p.flush() exit(0)
def acked(err, msg): global delivered_records """Delivery report handler called on successful or failed delivery of message """ if err is not None: print("Failed to deliver message: {}".format(err)) else: delivered_records += 1 print("Produced record to topic {} partition [{}] @ offset {}". format(msg.topic(), msg.partition(), msg.offset())) f = open('2021-01-19.json', 'r') data = json.load(f) for n in range(1000): record_key = "alice" record_value = json.dumps(data[n]) print("Producing record: {}\t{}".format(record_key, record_value)) producer.produce(topic, key=record_key, value=record_value, on_delivery=acked) # p.poll() serves delivery reports (on_delivery) # from previous produce() calls. producer.poll(0.25) producer.flush() print("{} messages were produced to topic {}!".format( delivered_records, topic))
class CoinbasePro(cbpro.WebsocketClient): def on_open(self): self.url = "wss://ws-feed.pro.coinbase.com/" self.products = ["BTC-USD", "ETH-USD", "LTC-USD", "BCH-USD" , 'ETH-BTC', 'LTC-BTC'] self.type = 'ticker' self.producer = Producer({ 'bootstrap.servers': ','.join(KAFKA_NODES), 'default.topic.config': { 'request.required.acks': 'all' } }) print('Established Socket Connection') def on_message(self, msg): def delivery_report(err, k_msg): # triggers delivery report by poll() or flush() if err is not None: print(('Message delivery failed: {}'.format(err))) else: print(('Message delivered to {} [{}] - {}'.format( k_msg.topic(), k_msg.partition(), msg['product_id']))) if 'time' in msg: asset_pair = msg['product_id'] data = { 'bids': (msg['best_bid']), 'len_bids': 1, 'asks': (msg['best_ask']), 'len_asks': 1, 'product': asset_pair, 'time': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+0000") } data['market'] = "Coinbase" message = json.dumps(data) # push to kafka topic topic = 'asks' self.producer.poll(0) self.producer.produce( topic, message.encode('utf-8'), key=asset_pair, callback=delivery_report) def _connect(self): if self.products is None: self.products = ["BTC-USD"] elif not isinstance(self.products, list): self.products = [self.products] if self.url[-1] == "/": self.url = self.url[:-1] self.ws = create_connection(self.url) self.stop = False if self.type == "heartbeat": sub_params = { 'type': 'subscribe', "channels": [{ "name": "heartbeat", "product_ids": self.products }] } self.ws.send(json.dumps(sub_params)) elif self.type == 'ticker': sub_params = { 'type': 'subscribe', "channels": [{ "name": "ticker", "product_ids": self.products }] } self.ws.send(json.dumps(sub_params)) else: sub_params = {'type': 'subscribe', 'product_ids': self.products} self.ws.send(json.dumps(sub_params))
class ConfluentKafkaMsgQAPI: """ This class provides API's into interact with Kafka Queue. """ def __init__(self, is_producer=False, is_consumer=False, perform_subscription=False, thread_identifier=None): if not is_producer and not is_consumer: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer." ) pass self.producer_instance = None self.consumer_instance = None self.broker_name = None self.topic = None self.producer_conf = None self.consumer_conf = None self.is_topic_created = False self.perform_subscription = perform_subscription self.thread_identifier = thread_identifier self.__read_environment_variables() # if is_producer: # self.__producer_connect() # if is_consumer: # self.__consumer_connect() def __read_environment_variables(self): """ This method is used to read the environment variables defined in the OS. :return: """ while self.broker_name is None or \ self.topic is None: time.sleep(2) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: " "Trying to read the environment variables...") self.broker_name = os.getenv("broker_name_key", default=None) self.topic = os.getenv("topic_key", default=None) logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name)) logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format( self.topic)) # Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). @staticmethod def delivery_callback(err, msg): if err: logging_to_console_and_syslog('%% Message failed delivery: %s\n' % err) else: logging_to_console_and_syslog( '%% Message delivered to %s [%d] @ %s\n' % (msg.topic(), msg.partition(), str(msg.offset()))) def __producer_connect(self): """ This method tries to connect to the kafka broker based upon the type of kafka. :return: """ is_connected = False if self.producer_instance is None: try: self.producer_conf = {'bootstrap.servers': self.broker_name} # Create Producer instance self.producer_instance = Producer(**self.producer_conf) is_connected = True except: print("Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) time.sleep(5) else: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Successfully " "connected to broker_name={}".format(self.broker_name)) return is_connected def enqueue(self, filename): """ This method tries to post a message to the pre-defined kafka topic. :param filename: :return status False or True: """ status = False if filename is None or len(filename) == 0: logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: filename is None or invalid") return status if self.producer_instance is None: logging_to_console_and_syslog( "KafkaMsgQAPI: Producer instance is None. Trying to create one.." ) if not self.__producer_connect(): logging_to_console_and_syslog( "Unable to create producer instance.") return status if not self.is_topic_created: try: if self.producer_instance.list_topics(self.topic, timeout=1.0): logging_to_console_and_syslog( "Found topic name = {} in the zookeeper.".format( self.topic)) self.is_topic_created = True except KafkaException: kafka_admin_client = admin.AdminClient(self.producer_conf) logging_to_console_and_syslog("Creating topic {}.".format( self.topic)) ret = kafka_admin_client.create_topics(new_topics=[ admin.NewTopic(topic=self.topic, num_partitions=1) ], operation_timeout=1.0) logging_to_console_and_syslog("ret = {}".format(ret)) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. logging_to_console_and_syslog( "ConfluentKafkaMsgQAPI: Posting filename={} into " "kafka broker={}, topic={}".format(filename, self.broker_name, self.topic)) value = filename.encode('utf-8') try: # Produce line (without newline) self.producer_instance.produce( self.topic, value, callback=ConfluentKafkaMsgQAPI.delivery_callback) status = True except BufferError: sys.stderr.write('%% Local producer queue is full ' '(%d messages awaiting delivery): try again\n' % len(self.producer_instance)) status = False except: print("ConfluentKafkaMsgQAPI: Exception in user code:") print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) status = False else: event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \ "kafka broker={}, topic={}." \ .format(filename, self.broker_name, self.topic) logging_to_console_and_syslog(event) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. self.producer_instance.poll(timeout=0.1) # Wait until all messages have been delivered # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance)) self.producer_instance.flush(timeout=0.1) return status def __consumer_connect_to_broker(self): """ This method tries to connect to the kafka broker. :return: """ is_connected = False # Consumer configuration # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md """ self.consumer_conf = {'bootstrap.servers': self.broker_name, 'group.id': 'kafka-consumer', 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest'} """ if self.consumer_instance is None: try: logging_to_console_and_syslog( "Consumer:{}:Trying to connect to broker_name={}".format( self.thread_identifier, self.broker_name)) # Create Consumer instance # Hint: try debug='fetch' to generate some log messages consumer_conf = { 'bootstrap.servers': self.broker_name, 'group.id': self.topic, 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest' } # consumer_conf['stats_cb'] = stats_cb # consumer_conf['statistics.interval.ms'] = 0 self.consumer_instance = Consumer(consumer_conf) is_connected = True except: logging_to_console_and_syslog( "Consumer:{}:Exception in user code:".format( self.thread_identifier)) logging_to_console_and_syslog("-" * 60) traceback.print_exc(file=sys.stdout) logging_to_console_and_syslog("-" * 60) time.sleep(5) logging_to_console_and_syslog("Consumer:{}:Consumer Successfully " "connected to broker_name={}".format( self.thread_identifier, self.broker_name)) return is_connected @staticmethod def print_assignment(consumer, partitions): print('consumer = {}, Assignment {}:'.format(consumer, partitions)) def dequeue(self): conf = { 'bootstrap.servers': self.broker_name, 'group.id': self.topic, 'session.timeout.ms': 6000, 'auto.offset.reset': 'earliest' } if not self.consumer_instance: self.consumer_instance = Consumer(conf) self.consumer_instance.subscribe( [self.topic], on_assign=ConfluentKafkaMsgQAPI.print_assignment) msg = self.consumer_instance.poll(timeout=1.0) if msg is None or msg.error(): return None else: logging_to_console_and_syslog( '%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) msg = msg.value().decode('utf8') logging_to_console_and_syslog("msg.value()={}".format(msg)) self.consumer_instance.close() self.consumer_instance = None return msg def cleanup(self): if self.consumer_instance: self.consumer_instance.close() self.consumer_instance = None
# 讓我們產生假的Employee資料 employee = Employee(id_='emp_id_' + fakeNumber, first_name='fn_' + fakeNumber, last_name='ln_' + fakeNumber, dept_id='dept_id_' + str(i % 10), hire_date=epoch_now_mills(), wage=float(i), sex=True) # 轉換成JSON字串 employeeJson = json.dumps(employee.__dict__) # 送出訊息 producer.produce(topicName, key=str(i), value=employeeJson, callback=delivery_callback) producer.poll(0) # 呼叫poll來讓client程式去檢查內部的Buffer, 並觸發callback if i % 10000 == 0: print('Send {} messages'.format(i)) time_spend = int(round(time.time() * 1000)) - time_start print('Send : ' + str(msgCount) + ' messages to Kafka') print('Total spend : ' + str(time_spend) + ' millis-seconds') print('Throughput : ' + str(msgCount / time_spend * 1000) + ' msg/sec') except BufferError as e: # 錯誤處理 sys.stderr.write( '%% Local producer queue is full ({} messages awaiting delivery): try again\n' .format(len(producer))) except Exception as e: print(e) # 步驟5. 確認所有在Buffer裡的訊息都己經送出去給Kafka了
class BMPNodes(object): def __init__(self, bootstrap_server=None, redishost=None): self.nodes = {} if redishost is None: raise ValueError("Redis Hostname not specified, bailing out") else: self.redis = redis.StrictRedis(host=redishost) self.redis.flushall() self.pubsub = self.redis.pubsub() self.routerevent = threading.Event() self.peerevent = threading.Event() self.threadList = [] self.poisonpillq = Queue.Queue() self.peer_consumer = None self.router_consumer = None self.prefix_consumer = None self.rib_producer = None if bootstrap_server is not None: self.bootstrap_server = bootstrap_server for fn in [ self.capture_router_msg, self.capture_peer_msg, self.capture_prefix_msg, self.redis_listener ]: thread = threading.Thread(target=fn, args=()) self.threadList.append(thread) thread.daemon = True # Daemonize thread thread.start() # Start the execution else: raise ValueError("Bootstrap server not specified") self.dispatch = {'init': self.add_router, 'term': self.delete_router} self.redis_dispatch = { 'AdjInRib': self.adjRibPolicyWorker, 'AdjInRibPP': self.localRibWorker, 'localRib': self.kafkaWorker } def get_nodes(self): nodeset = {} for node in self.nodes.keys(): rtr = self.nodes[node] nodeset.update({str(rtr.name) + ':' + str(rtr.ipaddr): node}) # Also provide the reverse mapping nodeset.update({node: str(rtr.name) + ':' + str(rtr.ipaddr)}) return nodeset def serialize(self): nodeset = {} for node in self.nodes.keys(): nodeset.update({node: self.nodes[node].serialize()}) return nodeset class PoisonPillException(Exception): pass def consumer_cleanup(self): logger.debug("Cleaning up, exiting the active threads") for thread in self.threadList: self.poisonpillq.put("quit") # The redis listener will need the poisonpill channel publish self.redis.publish('poisonpill', "quit") for thread in self.threadList: logger.debug("Waiting for %s to finish..." % (thread.name)) thread.join() return def process_msg(self, router_msg): # Ignore the first message (action = first) for msg in router_msg: if str(msg['action']) != 'first': self.dispatch[str(msg['action'])](msg) else: logger.debug("Ignoring action=first in openbmp router message") def add_router(self, router_msg): if str(router_msg['hash']) not in self.nodes: # Create the router object node = Node(node_hash=router_msg['hash'], name=router_msg.pop('name'), ipaddr=router_msg.pop('ip_address'), data=router_msg) # Add to existing router set self.nodes.update({str(router_msg['hash']): node}) else: logger.debug( "Received an add event for an existing peer. Strange, but ignore" ) def delete_router(self, router_msg): if str(router_msg['hash']) in self.nodes: # Delete the particular router from the current router set del self.nodes[str(router_msg['hash'])] # Delete the router hash from redis self.redis.delete(str(router_msg['hash'])) else: logger.debug( "Received a del event for a non-existent peer, ignore") def update_redis(self, channel=None): # Called to reflect latest state when new messages are received. nodes = {} if self.get_nodes(): self.redis.hmset("routers", self.get_nodes()) for node in self.nodes.keys(): self.redis.hmset(node, self.nodes[node].serialize()) if channel: # Publish message to redis Listeners self.redis.publish( channel, "Publish to " + str(self.redis_dispatch[channel].__name__) + " worker") def redis_listener(self): self.pubsub.subscribe( ['AdjInRib', 'AdjInRibPP', 'localRib', 'poisonpill']) pill = '' try: while True: for item in self.pubsub.listen(): logger.info("Received Redis event") if item['data'] == "quit": self.pubsub.unsubscribe() logger.debug( "unsubscribed and finished redis pubsub listener") raise self.PoisonPillException else: if item['channel'] in self.redis_dispatch: self.redis_dispatch[item['channel']]() except self.PoisonPillException: return except Exception as e: logger.debug("Error while listening to redis events") logger.debug("Error is" + str(e)) return def adjRibPolicyWorker(self): logger.debug("Received an AdjInRib event") # walk through the nodes and apply available policies #nodes = {} if self.get_nodes(): for node in self.nodes.keys(): # process and apply policies self.nodes[node].adjInRibPP.process_adjInRib(node, self.redis) self.update_redis('AdjInRibPP') def localRibWorker(self): # walk through the nodes and apply available path selection algorithms #nodes = {} if self.get_nodes(): for node in self.nodes.keys(): # process and do path selection self.nodes[node].localRib.process_adjInRibPP(node, self.redis) self.update_redis('localRib') # Optional per-message delivery callback (triggered by poll() or flush()) # during the rib stream to kafka when a message has been successfully delivered # or permanently failed delivery (after retries). @staticmethod def delivery_callback(err, msg): if err: logger.debug('%% Message failed delivery: %s\n' % err) else: logger.debug('%% Message delivered to %s [%d]\n' % (msg.topic(), msg.partition())) def kafkaWorker(self): # With the local Rib ready, push routes to Kafka. This is meant to # serve as a streaming set of routes to router clients which will be # kafka consumers. This is NOT a way to resync if the router dies or # router client disconnects - for that sync with the redis database # first and then start listening to fresh messages from Kafka for route events. self.rib_producer = Producer( {'bootstrap.servers': self.bootstrap_server}) if self.get_nodes(): for node in self.nodes.keys(): topic = self.nodes[node].hash # fetch localRib routes from Redis, push to Kafka bus localRib = ast.literal_eval(self.redis.hget(node, 'localRib')) if localRib: for route in localRib: logger.debug(route) # self.shuttler.rtQueue.put(route) try: self.rib_producer.produce( topic, value=json.dumps(route), callback=self.delivery_callback) self.rib_producer.poll(0) except BufferError as e: logger.debug( '%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(self.rib_producer)) # putting the poll() first to block until there is queue space available. # This blocks for RIB_PRODUCER_WAIT_INTERVAL seconds because message delivery can take some time # if there are temporary errors on the broker (e.g., leader failover). self.rib_producer.poll(RIB_PRODUCER_WAIT_INTERVAL * 1000) # Now try again when there is hopefully some free space on the queue self.rib_producer.produce( topic, value=json.dumps(route), callback=self.delivery_callback) # Wait until all messages have been delivered logger.debug('%% Waiting for %d deliveries\n' % len(self.rib_producer)) self.rib_producer.flush() def capture_router_msg(self): pill = '' topics = ['openbmp.parsed.router'] logger.debug("Connecting to Kafka to receive router messages") self.router_consumer = Consumer({ 'bootstrap.servers': self.bootstrap_server, 'group.id': 'bmp_client' + str(time.time()), 'client.id': 'bmp_client' + str(time.time()), 'default.topic.config': { 'auto.offset.reset': 'smallest', 'auto.commit.interval.ms': 1000, 'enable.auto.commit': True } }) self.router_consumer.subscribe(topics) try: while True: msg = self.router_consumer.poll(timeout=1.0) try: pill = self.poisonpillq.get_nowait() except Queue.Empty: pass if isinstance(pill, str) and pill == "quit": raise self.PoisonPillException if msg is None: self.routerevent.set() continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event logger.debug( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Process the message m = Message(msg.value()) # Gets body of kafka message. t = msg.topic() # Gets topic of kafka message. m_tag = t.split('.')[2].upper() t_stamp = str(datetime.datetime.now()) if t == "openbmp.parsed.router": router = Router(m) logger.debug('Received Message (' + t_stamp + ') : ' + m_tag + '(V: ' + str(m.version) + ')') logger.debug(router.to_json_pretty()) router_msg = yaml.safe_load(router.to_json_pretty()) logger.debug("Calling process msg for Router messages") bmpnodes.process_msg(router_msg) # update redis self.update_redis() self.routerevent.clear() except self.PoisonPillException: logger.debug("Poison Pill received") logger.debug("Shutting down the router message consumer") self.router_consumer.close() return except Exception as e: logger.debug( "Exception occurred while listening for router messages") logger.debug("Error is " + str(e)) self.router_consumer.close() return def capture_peer_msg(self): pill = '' topics = ['openbmp.parsed.peer'] logger.info("Connecting to Kafka to receive peer messages") self.peer_consumer = Consumer({ 'bootstrap.servers': self.bootstrap_server, 'group.id': 'bmp_client' + str(time.time()), 'client.id': 'bmp_client' + str(time.time()), 'default.topic.config': { 'auto.offset.reset': 'smallest', 'auto.commit.interval.ms': 1000, 'enable.auto.commit': True } }) self.peer_consumer.subscribe(topics) try: while True: msg = self.peer_consumer.poll(timeout=1.0) try: pill = self.poisonpillq.get_nowait() except Queue.Empty: pass if isinstance(pill, str) and pill == "quit": raise self.PoisonPillException if msg is None: self.peerevent.set() continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event logger.debug( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Process the message m = Message(msg.value()) # Gets body of kafka message. t = msg.topic() # Gets topic of kafka message. m_tag = t.split('.')[2].upper() t_stamp = str(datetime.datetime.now()) if t == "openbmp.parsed.peer": peer = Peer(m) logger.debug('Received Message (' + t_stamp + ') : ' + m_tag + '(V: ' + str(m.version) + ')') logger.debug(peer.to_json_pretty()) peer_msg = yaml.safe_load(peer.to_json_pretty()) for msg in peer_msg: processed = False while not processed: if str(msg['router_hash']) in self.nodes: self.nodes[str( msg['router_hash'])].process_msg(msg) processed = True else: logger.debug( "Received peer message for currently unknown Router, hash=" + str(msg['router_hash'])) logger.debug( "Let's wait for router_msg event to be set" ) self.routerevent.wait( PEER_MSG_DAMPENING_TIMER) # Go ahead and update Redis self.update_redis() self.peerevent.clear() except self.PoisonPillException: logger.debug("Poison Pill received") logger.debug("Shutting down the peer message consumer") self.peer_consumer.close() return except Exception as e: logger.debug( "Exception occured while listening to peer messages from Kafka" ) logger.debug("Error is " + str(e)) self.router_consumer.close() return def capture_prefix_msg(self): pill = '' topics = ['openbmp.parsed.unicast_prefix'] logger.debug("Connecting to Kafka to receive prefix messages") self.prefix_consumer = Consumer({ 'bootstrap.servers': self.bootstrap_server, 'group.id': 'bmp_client' + str(time.time()), 'client.id': 'bmp_client' + str(time.time()), 'default.topic.config': { 'auto.offset.reset': 'smallest', 'auto.commit.interval.ms': 1000, 'enable.auto.commit': True } }) self.prefix_consumer.subscribe(topics) try: while True: msg = self.prefix_consumer.poll(timeout=1.0) try: pill = self.poisonpillq.get_nowait() except Queue.Empty: pass if isinstance(pill, str) and pill == "quit": raise self.PoisonPillException if msg is None: continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event logger.debug( '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset())) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Process the message m = Message(msg.value()) # Gets body of kafka message. t = msg.topic() # Gets topic of kafka message. m_tag = t.split('.')[2].upper() t_stamp = str(datetime.datetime.now()) if t == "openbmp.parsed.unicast_prefix": unicast_prefix = UnicastPrefix(m) logger.debug('Received Message (' + t_stamp + ') : ' + m_tag + '(V: ' + str(m.version) + ')') logger.debug(unicast_prefix.to_json_pretty()) prefix_msg = yaml.safe_load( unicast_prefix.to_json_pretty()) for msg in prefix_msg: processed = False while not processed: if str(msg['router_hash']) in self.nodes: self.nodes[str(msg['router_hash'] )].adjInRib.process_msg(msg) processed = True else: logger.debug( "Received peer message for currently unknown Router, hash=" + str(msg['router_hash'])) logger.debug( "Let's wait for router_msg event to be set" ) self.peerevent.wait( PREFIX_MSG_DAMPENING_TIMER) # Go ahead and update Redis self.update_redis('AdjInRib') except self.PoisonPillException: logger.debug("Poison Pill received") logger.debug("Shutting down the prefix message consumer") self.prefix_consumer.close() return except Exception as e: logger.debug( "Exception occurred while listening for prefix messages") logger.debug("Error is " + str(e)) self.prefix_consumer.close() return
def _produce(data, topic=_CONFIG['topics']['workspace_events']): print(f"Producing to: {_CONFIG['kafka_server']}") producer = Producer({'bootstrap.servers': _CONFIG['kafka_server']}) producer.produce(topic, json.dumps(data), callback=_delivery_report) producer.poll(60)
class KafkaProducer(Producer[TPayload]): def __init__(self, configuration: Mapping[str, Any], codec: Codec[KafkaPayload, TPayload]) -> None: self.__configuration = configuration self.__codec = codec self.__producer = ConfluentProducer(configuration) self.__shutdown_requested = Event() # The worker must execute in a separate thread to ensure that callbacks # are fired -- otherwise trying to produce "synchronously" via # ``produce(...).result()`` could result in a deadlock. self.__result = execute(self.__worker) def __worker(self) -> None: """ Continuously polls the producer to ensure that delivery callbacks are triggered (which correspondingly set the result values on the ``Future`` instances returned by ``produce``.) This function exits after a shutdown request has been issued (via ``close``) and all in-flight messages have been delivered. """ while not self.__shutdown_requested.is_set(): self.__producer.poll(0.1) self.__producer.flush() def __delivery_callback( self, future: Future[Message[TPayload]], payload: TPayload, error: KafkaError, message: ConfluentMessage, ) -> None: if error is not None: future.set_exception(TransportError(error)) else: try: timestamp_type, timestamp_value = message.timestamp() if timestamp_type is TIMESTAMP_NOT_AVAILABLE: raise ValueError("timestamp not available") future.set_result( Message( Partition(Topic(message.topic()), message.partition()), message.offset(), payload, datetime.utcfromtimestamp(timestamp_value / 1000.0), )) except Exception as error: future.set_exception(error) def produce(self, destination: Union[Topic, Partition], payload: TPayload) -> Future[Message[TPayload]]: if self.__shutdown_requested.is_set(): raise RuntimeError("producer has been closed") if isinstance(destination, Topic): produce = partial(self.__producer.produce, topic=destination.name) elif isinstance(destination, Partition): produce = partial( self.__producer.produce, topic=destination.topic.name, partition=destination.index, ) else: raise TypeError("invalid destination type") encoded = self.__codec.encode(payload) future: Future[Message[TPayload]] = Future() future.set_running_or_notify_cancel() produce( value=encoded.value, key=encoded.key, headers=encoded.headers, on_delivery=partial(self.__delivery_callback, future, payload), ) return future def close(self) -> Future[None]: self.__shutdown_requested.set() return self.__result
try: print('Start sending messages ...') time_start = int(round(time.time() * 1000)) # produce(topic, [value], [key], [partition], [on_delivery], [timestamp], [headers]) # ** 示範: Fire - and -forget ** # 在以下的"prouce()"過程, 我們並沒有去檢查訊息發佈的結果 # 因此這種方法的throughput最高, 但也不知道訊息是否發佈成功或失敗 for i in range(0, msgCount): producer.produce(topicName, key=str(i), value='msg_'+str(i)) producer.poll(0) # <-- (重要) 呼叫poll來讓client程式去檢查內部的Buffer if i%10000==0: print('Send {} messages'.format(i)) time_spend = int(round(time.time() * 1000)) - time_start print('Send : ' + str(msgCount) + ' messages to Kafka') print('Total spend : ' + str(time_spend) + ' millis-seconds') print('Throughtput : ' + str(msgCount/time_spend * 1000) + ' msg/sec') except BufferError as e: # 錯誤處理 sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(producer)) except Exception as e: print(e)
# Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). def delivery_callback(err, msg): if err: sys.stderr.write('%% Message failed delivery: %s\n' % err) else: sys.stderr.write('%% Message delivered to %s [%d] @ %d\n' % (msg.topic(), msg.partition(), msg.offset())) # Read lines from stdin, produce each line to Kafka for line in sys.stdin: try: # Produce line (without newline) p.produce(topic, line.rstrip(), callback=delivery_callback) except BufferError: sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(p)) # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the # last produce()d message. p.poll(0) # Wait until all messages have been delivered sys.stderr.write('%% Waiting for %d deliveries\n' % len(p)) p.flush()
class RiverApiProducer: def __init__(self, broker=None, callback_function=None): """ Instantiate the class and create the consumer object :param broker: host[:port]’ string (or list of ‘host[:port]’ strings) that the consumer should contact to bootstrap initial cluster metadata :param callback_function: fn taking 3 args: err, msg, obj, that is called after the event is produced and an error increment (int). Default logs the error or success """ self.broker = broker self.partition = 0 self.callback_function = (callback_function if callback_function else self.callback_fn) # Create consumer self.producer = Producer(self._generate_config()) def _generate_config(self): """ Generate configuration dictionary for consumer :return: """ config = {"bootstrap.servers": self.broker, "session.timeout.ms": 6000} return config def produce_event(self, topic, event): """ Produce event in the specified topic :param topic: str :param event: dict :return: """ try: self.producer.produce( topic=topic, value=json.dumps(event, default=self.default_json_encoder), callback=lambda err, msg, obj=event: self.callback_function( err, msg, obj), ) self.producer.poll(1) # Callback function except ValueError as error: logger.error(error) @staticmethod def default_json_encoder(o): """ Json Encoder for datetime :return: """ if isinstance(o, (datetime.date, datetime.datetime)): return o.isoformat() @staticmethod def callback_fn(err, msg, obj): """ Handle delivery reports served from producer.poll. This callback takes an extra argument, obj. This allows the original contents to be included for debugging purposes. """ if err is not None: logger.debug( "Message {} delivery failed with error {} for topic {}".format( obj, err, msg.topic())) else: logger.debug("Event Successfully created")
'bootstrap.servers': "kafka:2181", 'group.id': "json_producer" }) time.sleep(10) def delivery_callback (err, msg): if err: sys.stderr.write('%% Message failed delivery: %s\n' % err) else: sys.stderr.write('%% Message delivered to %s [%d]\n' % \ (msg.topic(), msg.partition())) for tweet in get_tweet('examples/tweets-200k.txt.gz'): # if len(tweet['entities']['urls']) > 0 and \ # any(tweet['lang'] in l for l in ['es', 'en']): try: print("%s: %s" % (tweet['user']['screen_name'], tweet['text'])) kfk.produce( "raw_tweets", json.dumps(tweet), callback=delivery_callback ) kfk.poll(0) kfk.flush() except BufferError as e: sys.stderr.write('%% Local producer queue is full ' \ '(%d messages awaiting delivery): try again\n' % len(kfk))
from config import Config from confluent_kafka import Producer def acked(err, msg): if err is not None: print("Failed to deliver message: {0}: {1}".format( msg.value(), err.str())) else: print("Message produced: {0}".format(msg.value())) # binary p = Producer({"bootstrap.servers": Config.MY_SERVER}) try: for val in range(1, 5): p.produce(Config.TOPIC_ID, "value #{0}".format(val), callback=acked) p.poll(0.5) except KeyboardInterrupt: pass p.flush(100) # kafka-console-consumer --bootstrap-server localhost:9092 --topic first-topic
def test_ingester(self): print(f'{time_stamp()}: Setting up paths') path_kafka = pathlib.Path(config['path']['kafka']) path_logs = pathlib.Path(config['path']['logs']) if not path_logs.exists(): path_logs.mkdir(parents=True, exist_ok=True) print(f'{time_stamp()}: Setting up test program in Fritz') program = Program(group_name="FRITZ_TEST", group_nickname="Fritz") # clean up old Kafka logs print(f'{time_stamp()}: Cleaning up Kafka logs') subprocess.run([ 'rm', '-rf', path_logs / "kafka-logs", "/tmp/zookeeper" ]) print(f'{time_stamp()}: Starting up ZooKeeper at localhost:2181') # start ZooKeeper in the background cmd_zookeeper = [os.path.join(config['path']['kafka'], 'bin', 'zookeeper-server-start.sh'), '-daemon', os.path.join(config['path']['kafka'], 'config', 'zookeeper.properties')] with open(path_logs / 'zookeeper.stdout', 'w') as stdout_zookeeper: p_zookeeper = subprocess.run(cmd_zookeeper, stdout=stdout_zookeeper, stderr=subprocess.STDOUT) # take a nap while it fires up time.sleep(3) print(f'{time_stamp()}: Starting up Kafka Server at localhost:9092') # start the Kafka server: cmd_kafka_server = [os.path.join(config['path']['kafka'], 'bin', 'kafka-server-start.sh'), '-daemon', os.path.join(config['path']['kafka'], 'config', 'server.properties')] with open(os.path.join(config['path']['logs'], 'kafka_server.stdout'), 'w') as stdout_kafka_server: # p_kafka_server = subprocess.Popen(cmd_kafka_server, stdout=stdout_kafka_server, stderr=subprocess.STDOUT) p_kafka_server = subprocess.run(cmd_kafka_server) # take a nap while it fires up time.sleep(3) # get kafka topic names with kafka-topics command cmd_topics = [os.path.join(config['path']['kafka'], 'bin', 'kafka-topics.sh'), '--zookeeper', config['kafka']['zookeeper.test'], '-list'] topics = subprocess.run(cmd_topics, stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n')[:-1] print(f'{time_stamp()}: Found topics: {topics}') # create a test ZTF topic for the current UTC date date = datetime.datetime.utcnow().strftime("%Y%m%d") topic_name = f'ztf_{date}_programid1_test' if topic_name in topics: # topic previously created? remove first cmd_remove_topic = [os.path.join(config['path']['kafka'], 'bin', 'kafka-topics.sh'), '--zookeeper', config['kafka']['zookeeper.test'], '--delete', '--topic', topic_name] # print(kafka_cmd) remove_topic = subprocess.run(cmd_remove_topic, stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n')[:-1] print(f'{time_stamp()}: {remove_topic}') print(f'{time_stamp()}: Removed topic: {topic_name}') time.sleep(1) if topic_name not in topics: print(f'{time_stamp()}: Creating topic {topic_name}') cmd_create_topic = [os.path.join(config['path']['kafka'], 'bin', 'kafka-topics.sh'), "--create", "--bootstrap-server", config['kafka']['bootstrap.test.servers'], "--replication-factor", "1", "--partitions", "1", "--topic", topic_name] with open(os.path.join(config['path']['logs'], 'create_topic.stdout'), 'w') as stdout_create_topic: p_create_topic = subprocess.run(cmd_create_topic, stdout=stdout_create_topic, stderr=subprocess.STDOUT) print(f'{time_stamp()}: Starting up Kafka Producer') # spin up Kafka producer producer = Producer({'bootstrap.servers': config['kafka']['bootstrap.test.servers']}) # small number of alerts that come with kowalski path_alerts = pathlib.Path('/app/data/ztf_alerts/20200202/') # grab some more alerts from gs://ztf-fritz/sample-public-alerts try: print(f'{time_stamp()}: Grabbing more alerts from gs://ztf-fritz/sample-public-alerts') r = requests.get('https://www.googleapis.com/storage/v1/b/ztf-fritz/o') aa = r.json()['items'] ids = [pathlib.Path(a['id']).parent for a in aa if 'avro' in a['id']] except Exception as e: print(f'{time_stamp()}: Grabbing alerts from gs://ztf-fritz/sample-public-alerts failed, but it is ok') print(f'{time_stamp()}: {e}') ids = [] subprocess.run([ "gsutil", "-m", "cp", "-n", "gs://ztf-fritz/sample-public-alerts/*.avro", "/app/data/ztf_alerts/20200202/" ]) print(f'{time_stamp()}: Fetched {len(ids)} alerts from gs://ztf-fritz/sample-public-alerts') # push! for p in path_alerts.glob('*.avro'): with open(str(p), 'rb') as data: # Trigger any available delivery report callbacks from previous produce() calls producer.poll(0) print(f'{time_stamp()}: Pushing {p}') # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. producer.produce(topic_name, data.read(), callback=delivery_report) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. producer.flush() print(f'{time_stamp()}: Creating a test filter') test_filter = Filter( collection='ZTF_alerts', group_id=program.group_id, filter_id=program.filter_id ) print(f'{time_stamp()}: Starting up Ingester') # digest and ingest ingester(obs_date=date, test=True) print(f'{time_stamp()}: Digested and ingested: all done!') # shut down Kafka server and ZooKeeper time.sleep(10) print(f'{time_stamp()}: Removing the test filter') test_filter.remove() print(f'{time_stamp()}: Shutting down Kafka Server at localhost:9092') # start the Kafka server: cmd_kafka_server_stop = [os.path.join(config['path']['kafka'], 'bin', 'kafka-server-stop.sh'), os.path.join(config['path']['kafka'], 'config', 'server.properties')] with open(os.path.join(config['path']['logs'], 'kafka_server.stdout'), 'w') as stdout_kafka_server: p_kafka_server_stop = subprocess.run(cmd_kafka_server_stop, stdout=stdout_kafka_server, stderr=subprocess.STDOUT) print(f'{time_stamp()}: Shutting down ZooKeeper at localhost:2181') # start ZooKeeper in the background (using Popen and not run with shell=True for safety) cmd_zookeeper_stop = [os.path.join(config['path']['kafka'], 'bin', 'zookeeper-server-stop.sh'), os.path.join(config['path']['kafka'], 'config', 'zookeeper.properties')] with open(os.path.join(config['path']['logs'], 'zookeeper.stdout'), 'w') as stdout_zookeeper: p_zookeeper_stop = subprocess.run(cmd_zookeeper_stop, stdout=stdout_zookeeper, stderr=subprocess.STDOUT)
def generate(config, asset_0, asset_1, interval_ms, inject_error, devmode, destination): """generate data and send it to a Kafka broker""" interval_secs = interval_ms / 1000.0 random.seed() if not devmode: if destination == "kafka": #prepare Kafka connection kafka_config = config.get("kafka", {}) brokers = kafka_config.get("brokers", "localhost:9092") topic = kafka_config.get("topic", "simulator") kafkaconf = {'bootstrap.servers': brokers,'client.id': socket.gethostname()} producer = Producer(kafkaconf) else: if destination == "file": file_config = config.get("file", {}) filepath = file_config.get("filepath","output.json") destination_file = open(filepath, 'w+') #extract assets dimensions details asset_0_label = asset_0.get("label","asset_0") asset_0_nb_assets = asset_0.get("assets","3") asset_0_nb_dimensions = asset_0.get("dimensions","3") asset_0_dimensions_labels = asset_0.get("dimension_labels",[]) asset_0_dimensions_types = asset_0.get("dimension_types",[]) asset_0_dimensions_values = asset_0.get("dimension_values",[]) asset_1_label = asset_1.get("label","asset_1") asset_1_nb_assets = asset_1.get("assets","3") asset_1_nb_dimensions = asset_1.get("dimensions","3") asset_1_dimensions_labels = asset_1.get("dimension_labels",[]) asset_1_dimensions_types = asset_1.get("dimension_types",[]) asset_1_dimensions_values = asset_1.get("dimension_values",[]) asset_1_nb_metrics = asset_1.get("metrics",3) asset_1_metrics_values = asset_1.get("metrics_values") asset_1_metrics_labels = asset_1.get("metrics_labels") while True: data = { "timestamp": int(time.time()*1000000) } for a0 in range(asset_0_nb_assets): #GENERIC: generate asset_0 IDs data[asset_0_label+"_id"] = asset_0_label+"_" + str(a0) #GENERIC: generate asset_0 dimensions for key in range(asset_0_nb_dimensions): values = asset_0_dimensions_values.get("d_" + str(key)) labels = asset_0_dimensions_labels.get("d_" + str(key)) types = asset_0_dimensions_types.get("d_" + str(key)) if types == "fixed": data[labels] = values[a0] else: if types == "high_cardinality": data[labels] = labels + "_" + str(random.randint(0, values + 1)) else: if types == "random": data[labels] = random.choice(values) for a1 in range(asset_1_nb_assets): #GENERIC: generate asset_1 IDs data[asset_1_label+"_id"] = asset_1_label+"_" + str(a0)+"_"+str(a1) #GENERIC: generate asset_1 dimensions for key in range(asset_1_nb_dimensions): values = asset_1_dimensions_values.get("d_" + str(key)) labels = asset_1_dimensions_labels.get("d_" + str(key)) types = asset_1_dimensions_types.get("d_" + str(key)) if types == "fixed": data[labels] = values[a1] else: if types == "high_cardinality": data[labels] = labels + "_" + str(random.randint(0, values + 1)) else: if types == "random": data[labels] = random.choice(values) #GENERIC: generate metrics for key in range(asset_1_nb_metrics): min_val, max_val = asset_1_metrics_values.get("m_" + str(key)) label = asset_1_metrics_labels.get("m_" + str(key)) data[label] = random.randint(min_val, max_val) #Custom: Implement your abnormal behavior here -> if (inject_error == 'true'): data["discount"] = random.randint(20, 25) data["quantity"] = random.randint(1, 99) # -> end of abnormal behavior #GENERIC: publish the data if devmode: print(json.dumps(data, indent=4), flush=True) else: if destination == "kafka": producer.produce(topic, key=data[asset_0_label+"_id"], value=json.dumps(data)) producer.poll(0) else: if destination == "file": destination_file.write(json.dumps(data) + '\n') time.sleep(interval_secs)
class ProducerServer: def __init__(self, input_file: str, topic_name: str, **kwargs): self.p = Producer({ 'bootstrap.servers': kwargs['bootstrap_servers'], 'client.id': kwargs['client_id'] }) self.input_file = input_file self.topic_name = topic_name self.client = AdminClient( {'bootstrap.servers': kwargs['bootstrap_servers']}) def check_topic_exists(self) -> bool: """Checks if the given topic exists in Kafka.""" topic_metadata = self.client.list_topics(timeout=5) return self.topic_name in set( t.topic for t in iter(topic_metadata.topics.values())) # noqa def create_topics(self): """Create kafka topic.""" new_topics = [ NewTopic(self.topic_name, num_partitions=3, replication_factor=1) ] result = self.client.create_topics(new_topics) for topic, f in result.items(): try: f.result() # The result itself is None logger.debug(f"Topic {topic} created") except Exception as e: logger.error(f"Failed to create topic {topic}: {e}") def delivery_report(self, err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: logger.debug('Message delivery failed: {}'.format(err)) else: logger.debug('Message delivered to {} [{}]'.format( msg.topic(), msg.partition())) # noqa def time_millis(self): """Use this function to get the key for Kafka Events""" return str(round(time.time() * 1000)) def produce(self, message): """Produce record to kafka.""" logger.debug(f'message: {message}') while True: try: self.p.produce(self.topic_name, key=self.time_millis(), value=self.dict_to_binary(message), on_delivery=self.delivery_report) self.p.poll(0) break except BufferError as e: logger.error(e) self.p.poll(1) # TODO we're generating a dummy data def generate_data(self): # check if topic is exist or create if not self.check_topic_exists(): self.create_topics() else: logger.debug(f'topic {self.topic_name} alreadly exist!') # read data from file with open(self.input_file, 'r') as f: messages = json.loads(f.read()) # TODO send the correct data try: for message in messages: self.produce(message) except KeyboardInterrupt: logger.debug('aborted by user.') finally: self.p.flush() # TODO fill this in to return the json dictionary to binary def dict_to_binary(self, json_dict): """Json dictionary to binary.""" return json.dumps(json_dict)
from confluent_kafka import Producer import requests #producer配置,dict格式 p = Producer({ 'bootstrap.servers': '192.168.1.88:19092,192.168.1.88:29092,192.168.1.88:39092' }) #回调函数 def delivery_report(err, msg): if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) ##发送 for data in [i for i in range(30)]: p.produce('test', str(data), partition=1, callback=delivery_report) p.poll(10) ##等待返回结果最大时常,单位秒 p.flush()
class KafkaDestination(object): """ syslog-ng Apache Kafka destination. """ _kafka_producer = None _conf = dict() def __init__(self): self.hosts = None self.topic = None self.msg_key = None self.partition = None self.programs = None self.group_id = None self.broker_version = None self.verbose = False self.display_stats = False self.producer_config = None def init(self, args): """ This method is called at initialization time. Should return False if initialization fails. """ if 'producer_config' in args: try: self.producer_config = ast.literal_eval(args['producer_config']) self._conf.update(self.producer_config) except ValueError: LOG.error("Given config %s is not in a Python dict format." % args['producer_config']) try: self.hosts = args['hosts'] self.topic = args['topic'] self._conf['bootstrap.servers'] = self.hosts except KeyError: LOG.error("Missing `hosts` or `topic` option...") return False if 'msg_key' in args: self.msg_key = args['msg_key'] LOG.info("Message key used will be %s" % self.msg_key) if 'partition' in args: self.partition = args['partition'] LOG.info("Partition to produce to %s" % self.partition) # optional `programs` parameter to filter out messages if 'programs' in args: self.programs = parse_str_list(args['programs']) LOG.info("Programs to filter against %s" % self.programs) if 'group_id' in args: self.group_id = args['group_id'] self._conf['group.id'] = self.group_id LOG.info("Broker group_id=%s" % self.group_id) if 'broker_version' in args: self.broker_version = args['broker_version'] if '.'.join(self.broker_version.split('.')[:2]) in ('0.10', '0.11'): self._conf['api.version.request'] = True else: self._conf['broker.version.fallback'] = self.broker_version self._conf['api.version.request'] = False LOG.info("Broker version=%s" % self.broker_version) else: self.broker_version = DEFAULT_BROKER_VERSION_FALLBACK self._conf[ 'broker.version.fallback'] = DEFAULT_BROKER_VERSION_FALLBACK self._conf['api.version.request'] = False LOG.warn("Default broker version fallback %s " "will be applied here." % DEFAULT_BROKER_VERSION_FALLBACK) self._conf['on_delivery'] = delivery_callback if 'verbose' in args: # provide a global `on_delivery` callback in the `Producer()` config # dict better for memory consumptions vs per message callback. self.verbose = ast.literal_eval(args['verbose']) if not self.verbose: # only interested in delivery failures here. We do provide a # global on_delivery callback in the Producer() config dict and # also set delivery.report.only.error. self._conf['delivery.report.only.error'] = True LOG.info("Verbose mode is OFF: you will not be able to see " "messages in here. Failures only. Use 'verbose=('True')' " "in your destination options to see successfully " "processed messages in your logs.") # display broker stats? if 'display_stats' in args: self.display_stats = ast.literal_eval(args['display_stats']) if self.display_stats: self._conf['stats_cb'] = stats_callback LOG.info("Broker statistics will be displayed.") LOG.info( "Initialization of Kafka Python driver w/ args=%s" % self._conf) return True def open(self): """ Open a connection to the Kafka service. Should return False if initialization fails. """ LOG.info("Opening connection to the remote Kafka services at %s" % self.hosts) self._kafka_producer = Producer(**self._conf) return True def is_opened(self): """ Check if the connection to Kafka is able to receive messages. Should return False if target is not open. """ return self._kafka_producer is not None def close(self): """ Close the connection to the Kafka service. """ LOG.debug("KafkaDestination.close()....") if self._kafka_producer is not None: LOG.debug("Flushing producer w/ a timeout of 30 seconds...") self._kafka_producer.flush(30) return True # noinspection PyMethodMayBeStatic def deinit(self): """ This method is called at deinitialization time. """ LOG.debug("KafkaDestination.deinit()....") if self._kafka_producer: self._kafka_producer = None return True def send(self, ro_msg): """ Send a message to the target service It should return True to indicate success, False will suspend the destination for a period specified by the time-reopen() option. :return: True or False """ # do nothing if msg is empty if not ro_msg: return True # no syslog-ng `values-pair` here we dealing with `LogMessage` if type(ro_msg) != dict: # syslog-ng `LogMessage` is read-only # goal is rfc5424 we cannot use values-pair because of memory leaks try: msg = {'FACILITY': ro_msg.FACILITY, 'PRIORITY': ro_msg.PRIORITY, 'HOST': ro_msg.HOST, 'PROGRAM': ro_msg.PROGRAM, 'DATE': ro_msg.DATE, 'MESSAGE': ro_msg.MESSAGE} except AttributeError: LOG.error("Your version of syslog-ng is not supported. " "Please use syslog-ng 3.7.x") return False else: LOG.warn("You are using `values-pair` if you are using " "syslog-ng <= 3.11 it is known to be leaking...") msg = ro_msg try: # check if we do have a program filter defined. msg_program = msg['PROGRAM'] if self.programs is not None: if msg_program not in self.programs: # notify of success return True if msg_program == 'firewall': firewall_msg = msg['MESSAGE'] msg['MESSAGE'] = parse_firewall_msg(firewall_msg) elif msg_program == 'nat': nat_msg = msg['MESSAGE'] msg['MESSAGE'] = parse_nat_msg(nat_msg) # convert date string to UNIX timestamp msg_date = msg['DATE'] if msg_date is not None: msg['DATE'] = date_str_to_timestamp(msg_date) msg_string = str(msg) kwargs = {} if self.msg_key and self.msg_key in msg.keys(): kwargs['key'] = msg[self.msg_key] if self.partition: try: kwargs['partition'] = int(self.partition) except ValueError: LOG.warning( "Ignore partition=%s because it is not an int." % self.partition) self._kafka_producer.produce(self.topic, msg_string, **kwargs) # `poll()` doesn't do any sleeping at all if you give it 0, all # it does is grab a mutex, check a queue, and release the mutex. # It is okay to call poll(0) after each produce call, the # performance impact is negligible, if any. self._kafka_producer.poll(0) except BufferError: LOG.error("Producer queue is full. This message will be discarded. " "%d messages waiting to be delivered.", len(self._kafka_producer)) # do not return False here as the destination would be closed # and we would have to restart syslog-ng sleep(5) return True except (KafkaException, UnicodeEncodeError) as e: LOG.error("An error occurred while trying to send messages... " "See details: %s" % e, exc_info=True) sleep(5) # do not return False here as the destination would be closed # and we would have to restart syslog-ng return True return True